From e64f2ab95d49757eb516dd4d4768b4a558590c5c Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Mon, 23 Dec 2024 13:15:47 -0500 Subject: [PATCH 01/41] Initial framework for peer mgmt code and new queued msg tables Signed-off-by: Peter Broadhurst --- config/pkg/pldconf/transportmgr.go | 11 +- ...create_private_transaction_tables.down.sql | 6 +- .../000014_peer_queued_messages.down.sql | 5 + .../000014_peer_queued_messages.up.sql | 28 +++ ...create_private_transaction_tables.down.sql | 7 +- .../000014_peer_queued_messages.down.sql | 5 + .../sqlite/000014_peer_queued_messages.up.sql | 28 +++ core/go/internal/components/transportmgr.go | 2 +- core/go/internal/transportmgr/manager.go | 63 ++---- core/go/internal/transportmgr/peer.go | 190 ++++++++++++++++++ core/go/pkg/persistence/testdb_postgres.go | 12 ++ 11 files changed, 298 insertions(+), 59 deletions(-) create mode 100644 core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql create mode 100644 core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql create mode 100644 core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql create mode 100644 core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql create mode 100644 core/go/internal/transportmgr/peer.go diff --git a/config/pkg/pldconf/transportmgr.go b/config/pkg/pldconf/transportmgr.go index 72ec961bd..693164efa 100644 --- a/config/pkg/pldconf/transportmgr.go +++ b/config/pkg/pldconf/transportmgr.go @@ -14,15 +14,22 @@ */ package pldconf +import "github.com/kaleido-io/paladin/config/pkg/confutil" + type TransportManagerConfig struct { - NodeName string `json:"nodeName"` - Transports map[string]*TransportConfig `json:"transports"` + NodeName string `json:"nodeName"` + SendQueueLen *int `json:"sendQueueLen"` + Transports map[string]*TransportConfig `json:"transports"` } type TransportInitConfig struct { Retry RetryConfig `json:"retry"` } +var TransportManagerDefaults = &TransportManagerConfig{ + SendQueueLen: confutil.P(10), +} + type TransportConfig struct { Init TransportInitConfig `json:"init"` Plugin PluginConfig `json:"plugin"` diff --git a/core/go/db/migrations/postgres/000008_create_private_transaction_tables.down.sql b/core/go/db/migrations/postgres/000008_create_private_transaction_tables.down.sql index dcfd2c6d2..6937a4468 100644 --- a/core/go/db/migrations/postgres/000008_create_private_transaction_tables.down.sql +++ b/core/go/db/migrations/postgres/000008_create_private_transaction_tables.down.sql @@ -1,6 +1,6 @@ BEGIN; -DROP TABLE dispatches; -DROP TABLE state_distribution_acknowledgments; -DROP TABLE state_distributions; +DROP TABLE IF EXISTS dispatches; +DROP TABLE IF EXISTS state_distribution_acknowledgments; +DROP TABLE IF EXISTS state_distributions; COMMIT; diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql new file mode 100644 index 000000000..a8ee6d412 --- /dev/null +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql @@ -0,0 +1,5 @@ +BEGIN; +DROP TABLE queued_msg_acks; +DROP TABLE queued_msgs; +COMMIT; + diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql new file mode 100644 index 000000000..b05dcda22 --- /dev/null +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -0,0 +1,28 @@ +BEGIN; + +-- These tables are replaced (data is not migrated from initial state distribution specific implementation) +DROP TABLE state_distribution_acknowledgments; +DROP TABLE state_distributions; + +CREATE TABLE queued_msgs ( + "id" TEXT NOT NULL, + "created" BIGINT NOT NULL, + "cid" TEXT , + "node" TEXT NOT NULL, + "component" TEXT NOT NULL, + "reply_to" TEXT NOT NULL, + "type" TEXT NOT NULL, + "payload" TEXT , + PRIMARY KEY ("id") +); + +CREATE INDEX queued_msgs_node ON queued_msgs ("node"); + +CREATE TABLE queued_msg_acks ( + "id" TEXT NOT NULL, + "acked" BIGINT NOT NULL, + PRIMARY KEY ("id"), + FOREIGN KEY ("id") REFERENCES queued_msgs ("id") ON DELETE CASCADE +); + + diff --git a/core/go/db/migrations/sqlite/000008_create_private_transaction_tables.down.sql b/core/go/db/migrations/sqlite/000008_create_private_transaction_tables.down.sql index 7f7aaa276..a9a434b6d 100644 --- a/core/go/db/migrations/sqlite/000008_create_private_transaction_tables.down.sql +++ b/core/go/db/migrations/sqlite/000008_create_private_transaction_tables.down.sql @@ -1,3 +1,4 @@ -DROP TABLE dispatches; -DROP TABLE state_distribution_acknowledgments; -DROP TABLE state_distributions; +DROP TABLE IF EXISTS dispatches; +DROP TABLE IF EXISTS state_distribution_acknowledgments; +DROP TABLE IF EXISTS state_distributions; + diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql new file mode 100644 index 000000000..a8ee6d412 --- /dev/null +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql @@ -0,0 +1,5 @@ +BEGIN; +DROP TABLE queued_msg_acks; +DROP TABLE queued_msgs; +COMMIT; + diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql new file mode 100644 index 000000000..b05dcda22 --- /dev/null +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -0,0 +1,28 @@ +BEGIN; + +-- These tables are replaced (data is not migrated from initial state distribution specific implementation) +DROP TABLE state_distribution_acknowledgments; +DROP TABLE state_distributions; + +CREATE TABLE queued_msgs ( + "id" TEXT NOT NULL, + "created" BIGINT NOT NULL, + "cid" TEXT , + "node" TEXT NOT NULL, + "component" TEXT NOT NULL, + "reply_to" TEXT NOT NULL, + "type" TEXT NOT NULL, + "payload" TEXT , + PRIMARY KEY ("id") +); + +CREATE INDEX queued_msgs_node ON queued_msgs ("node"); + +CREATE TABLE queued_msg_acks ( + "id" TEXT NOT NULL, + "acked" BIGINT NOT NULL, + PRIMARY KEY ("id"), + FOREIGN KEY ("id") REFERENCES queued_msgs ("id") ON DELETE CASCADE +); + + diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index 32b9d54ff..3c6d214a2 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -29,7 +29,7 @@ type TransportMessage struct { CorrelationID *uuid.UUID Component string // The name of the component to route the message to once it arrives at the destination node Node string // The node id to send the message to - ReplyTo string // The node id to send replies to + ReplyTo string // The identity to respond to on the sending node MessageType string Payload []byte } diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index e7acf70d6..50d877ac4 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -28,7 +28,6 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" - "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/rpcserver" ) @@ -47,6 +46,11 @@ type transportManager struct { destinations map[string]components.TransportClient destinationsFixed bool destinationsMux sync.RWMutex + + peersLock sync.RWMutex + peers map[string]*peer + + senderBufferLen int } func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerConfig) components.TransportManager { @@ -57,6 +61,7 @@ func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerCo transportsByID: make(map[uuid.UUID]*transport), transportsByName: make(map[string]*transport), destinations: make(map[string]components.TransportClient), + senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), } } @@ -202,62 +207,20 @@ func (tm *transportManager) Send(ctx context.Context, msg *components.TransportM return i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } - if msg.Node == "" || msg.Node == tm.localNodeName { - return i18n.NewError(ctx, msgs.MsgTransportInvalidDestinationSend, tm.localNodeName, msg.Node) - } - if msg.ReplyTo == "" { msg.ReplyTo = tm.localNodeName } - // Note the registry is responsible for caching to make this call as efficient as if - // we maintained the transport details in-memory ourselves. - registeredTransportDetails, err := tm.registryManager.GetNodeTransports(ctx, msg.Node) + // Use or establish a peer connection for the send + peer, err := tm.getPeer(ctx, msg.Node) if err != nil { return err } - // See if any of the transports registered by the node, are configured on this local node - // Note: We just pick the first one if multiple are available, and there is no retry to - // fallback to a secondary one currently. - var transport *transport - for _, rtd := range registeredTransportDetails { - transport = tm.transportsByName[rtd.Transport] - } - if transport == nil { - // If we didn't find one, then feedback to the caller which transports were registered - registeredTransportNames := []string{} - for _, rtd := range registeredTransportDetails { - registeredTransportNames = append(registeredTransportNames, rtd.Transport) - } - return i18n.NewError(ctx, msgs.MsgTransportNoTransportsConfiguredForNode, msg.Node, registeredTransportNames) - } + // Push the send to the peer - this is a best effort interaction. + // There is some retry in the Paladin layer, and some transports provide resilience. + // However, the send is at-most-once, and the higher level message protocols that + // use this "send" must be fault tolerant to message loss. + return peer.send(ctx, msg) - // Call the selected transport to send - // Note: We do not push the transport details down to the plugin on every send, as they are very large - // (KBs of certificates and other data). - // The transport plugin uses GetTransportDetails to request them back from us, and then caches - // these internally through use of a long lived connection / connection-pool. - var correlID *string - if msg.CorrelationID != nil { - correlID = confutil.P(msg.CorrelationID.String()) - } - var zeroUUID uuid.UUID - if msg.MessageID == zeroUUID { - msg.MessageID = uuid.New() - } - err = transport.send(ctx, &prototk.Message{ - MessageType: msg.MessageType, - MessageId: msg.MessageID.String(), - CorrelationId: correlID, - Component: msg.Component, - Node: msg.Node, - ReplyTo: msg.ReplyTo, - Payload: msg.Payload, - }) - if err != nil { - return err - } - - return nil } diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go new file mode 100644 index 000000000..1ad0c9518 --- /dev/null +++ b/core/go/internal/transportmgr/peer.go @@ -0,0 +1,190 @@ +/* + * Copyright © 2024 Kaleido, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package transportmgr + +import ( + "cmp" + "context" + "sort" + + "github.com/google/uuid" + "github.com/hyperledger/firefly-common/pkg/i18n" + "github.com/kaleido-io/paladin/config/pkg/confutil" + "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/core/internal/msgs" + "github.com/kaleido-io/paladin/toolkit/pkg/log" + "github.com/kaleido-io/paladin/toolkit/pkg/prototk" +) + +type peer struct { + ctx context.Context + cancelCtx context.CancelFunc + + name string + tm *transportManager + transport *transport + + persistedMsgsAvailable chan struct{} + sendQueue chan *prototk.Message + + done chan struct{} +} + +type nameSortedPeers []*peer + +func (p nameSortedPeers) Len() int { return len(p) } +func (p nameSortedPeers) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p nameSortedPeers) Less(i, j int) bool { return cmp.Less(p[i].name, p[j].name) } + +// get a list of all active peers +func (tm *transportManager) listActivePeers() nameSortedPeers { + tm.peersLock.RLock() + defer tm.peersLock.RUnlock() + peers := make(nameSortedPeers, 0, len(tm.peers)) + for _, p := range tm.peers { + peers = append(peers, p) + } + sort.Sort(peers) + return peers +} + +// efficient read-locked call to get an active peer connection +func (tm *transportManager) getActivePeer(nodeName string) *peer { + tm.peersLock.RLock() + defer tm.peersLock.RUnlock() + return tm.peers[nodeName] +} + +func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer, error) { + + // Hopefully this is an already active connection + p := tm.getActivePeer(nodeName) + if p != nil { + // Already active and obtained via read-lock + log.L(ctx).Debugf("connection already active for peer '%s'", nodeName) + return p, nil + } + + // Otherwise take the write-lock and race to connect + tm.peersLock.Lock() + defer tm.peersLock.Unlock() + p = tm.peers[nodeName] + if p != nil { + // There was a race to connect to this peer, and the other routine won + log.L(ctx).Debugf("connection already active for peer '%s' (aft4er connection race)", nodeName) + return p, nil + } + + // We need to resolve the node transport, and build a new connection + log.L(ctx).Debugf("attempting connection for peer '%s'", nodeName) + p = &peer{ + tm: tm, + name: nodeName, + persistedMsgsAvailable: make(chan struct{}, 1), + sendQueue: make(chan *prototk.Message, tm.senderBufferLen), + done: make(chan struct{}), + } + p.ctx, p.cancelCtx = context.WithCancel( + log.WithLogField(tm.bgCtx /* go-routine need bg context*/, "peer", nodeName)) + + if nodeName == "" || nodeName == tm.localNodeName { + return nil, i18n.NewError(p.ctx, msgs.MsgTransportInvalidDestinationSend, tm.localNodeName, nodeName) + } + + // Note the registry is responsible for caching to make this call as efficient as if + // we maintained the transport details in-memory ourselves. + registeredTransportDetails, err := tm.registryManager.GetNodeTransports(p.ctx, nodeName) + if err != nil { + return nil, err + } + + // See if any of the transports registered by the node, are configured on this local node + // Note: We just pick the first one if multiple are available, and there is no retry to + // fallback to a secondary one currently. + for _, rtd := range registeredTransportDetails { + p.transport = tm.transportsByName[rtd.Transport] + } + if p.transport == nil { + // If we didn't find one, then feedback to the caller which transports were registered + registeredTransportNames := []string{} + for _, rtd := range registeredTransportDetails { + registeredTransportNames = append(registeredTransportNames, rtd.Transport) + } + return nil, i18n.NewError(p.ctx, msgs.MsgTransportNoTransportsConfiguredForNode, nodeName, registeredTransportNames) + } + + log.L(ctx).Debugf("connected to peer '%s'", nodeName) + tm.peers[nodeName] = p + return p, nil +} + +func (p *peer) notifyPersistedMsgAvailable() { + select { + case p.persistedMsgsAvailable <- struct{}{}: + default: + } +} + +func (p *peer) send(ctx context.Context, msg *components.TransportMessage) error { + + // Convert the message to the protobuf transport payload + var correlID *string + if msg.CorrelationID != nil { + correlID = confutil.P(msg.CorrelationID.String()) + } + var zeroUUID uuid.UUID + if msg.MessageID == zeroUUID { + msg.MessageID = uuid.New() + } + pMsg := &prototk.Message{ + MessageType: msg.MessageType, + MessageId: msg.MessageID.String(), + CorrelationId: correlID, + Component: msg.Component, + Node: msg.Node, + ReplyTo: msg.ReplyTo, + Payload: msg.Payload, + } + + // Push onto the sender channel as a fire-and-forget message, for the + // goroutine to handle (alongside dispatching persisted messages) + select { + case p.sendQueue <- pMsg: + log.L(ctx).Debugf("sending %s message %s (cid=%v)", msg.MessageType, msg.MessageID, msg.CorrelationID) + return nil + case <-ctx.Done(): + return i18n.NewError(ctx, msgs.MsgContextCanceled) + } +} + +func (p *peer) sender() { + defer close(p.done) + + log.L(p.ctx).Infof("peer %s active", p.name) + + for { + select { + case <-p.ctx.Done(): + log.L(p.ctx).Infof("peer %s inactive", p.name) + return + } + } +} + +func (p *peer) close() { + p.cancelCtx() + <-p.done +} diff --git a/core/go/pkg/persistence/testdb_postgres.go b/core/go/pkg/persistence/testdb_postgres.go index 501cd0981..2a91423dc 100644 --- a/core/go/pkg/persistence/testdb_postgres.go +++ b/core/go/pkg/persistence/testdb_postgres.go @@ -72,8 +72,20 @@ func buildReversedTableListFromMigrations() []string { case strings.HasSuffix(migrationFile, ".up.sql"): for scanner.Scan() { createTableMatch := createTableRegex.FindStringSubmatch(scanner.Text()) + dropTableMatch := dropTableRegex.FindStringSubmatch(scanner.Text()) if len(createTableMatch) == 2 { createTables[createTableMatch[1]] = migrationFile + } else if len(dropTableMatch) == 3 { + // Remove from create & drop list - as it's been superseded in a .up migration + delete(dropTables, dropTableMatch[2]) + delete(createTables, dropTableMatch[2]) + newDropList := make([]string, 0, len(dropList)) + for _, t := range dropList { + if t != dropTableMatch[2] { + newDropList = append(newDropList, t) + } + } + dropList = newDropList } } case strings.HasSuffix(migrationFile, ".down.sql"): From beae08f774338894036005fb98774e3a3189eb8f Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Tue, 24 Dec 2024 12:11:45 -0500 Subject: [PATCH 02/41] Spelling for reliable delivery API Signed-off-by: Peter Broadhurst --- .../000014_peer_queued_messages.up.sql | 4 +- .../sqlite/000014_peer_queued_messages.up.sql | 4 +- core/go/internal/components/transportmgr.go | 41 +++++++++++++++---- core/go/internal/transportmgr/manager.go | 40 ++++++++++++++++-- core/go/internal/transportmgr/peer.go | 5 --- 5 files changed, 74 insertions(+), 20 deletions(-) diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index b05dcda22..9d8077e5c 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -11,7 +11,7 @@ CREATE TABLE queued_msgs ( "node" TEXT NOT NULL, "component" TEXT NOT NULL, "reply_to" TEXT NOT NULL, - "type" TEXT NOT NULL, + "msg_type" TEXT NOT NULL, "payload" TEXT , PRIMARY KEY ("id") ); @@ -20,7 +20,7 @@ CREATE INDEX queued_msgs_node ON queued_msgs ("node"); CREATE TABLE queued_msg_acks ( "id" TEXT NOT NULL, - "acked" BIGINT NOT NULL, + "time" BIGINT NOT NULL, PRIMARY KEY ("id"), FOREIGN KEY ("id") REFERENCES queued_msgs ("id") ON DELETE CASCADE ); diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index b05dcda22..9d8077e5c 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -11,7 +11,7 @@ CREATE TABLE queued_msgs ( "node" TEXT NOT NULL, "component" TEXT NOT NULL, "reply_to" TEXT NOT NULL, - "type" TEXT NOT NULL, + "msg_type" TEXT NOT NULL, "payload" TEXT , PRIMARY KEY ("id") ); @@ -20,7 +20,7 @@ CREATE INDEX queued_msgs_node ON queued_msgs ("node"); CREATE TABLE queued_msg_acks ( "id" TEXT NOT NULL, - "acked" BIGINT NOT NULL, + "time" BIGINT NOT NULL, PRIMARY KEY ("id"), FOREIGN KEY ("id") REFERENCES queued_msgs ("id") ON DELETE CASCADE ); diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index 3c6d214a2..3d2c30d04 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -19,19 +19,36 @@ import ( "context" "github.com/google/uuid" + "gorm.io/gorm" "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" ) +func (tma TransportMessage) Table() string { + return "queued_msgs" +} + type TransportMessage struct { - MessageID uuid.UUID - CorrelationID *uuid.UUID - Component string // The name of the component to route the message to once it arrives at the destination node - Node string // The node id to send the message to - ReplyTo string // The identity to respond to on the sending node - MessageType string - Payload []byte + MessageID uuid.UUID `json:"id" gorm:"column:id,primaryKey"` + Created tktypes.Timestamp `json:"created" gorm:"column:created,autoCreateTime:false"` // generated in our code + CorrelationID *uuid.UUID `json:"correlationId" gorm:"column:cid"` + Component string `json:"component" gorm:"column:component"` // The name of the component to route the message to once it arrives at the destination node + Node string `json:"node" gorm:"column:node"` // The node id to send the message to + ReplyTo string `json:"replyTo" gorm:"column:reply_to"` // The identity to respond to on the sending node + MessageType string `json:"messageType" gorm:"column:msg_type"` + Payload []byte `json:"payload" gorm:"column:payload"` + *TransportMessageAck `json:",inline" gorm:"foreignKey:pub_txn_id;references:pub_txn_id"` +} + +func (tma TransportMessageAck) Table() string { + return "queued_msg_acks" +} + +type TransportMessageAck struct { + MessageID uuid.UUID `json:"-" gorm:"column:id,primaryKey"` + AckTime *tktypes.Timestamp `json:"ackTime,omitempty" gorm:"column:time,autoCreateTime:false"` // generated in our code } type TransportManagerToTransport interface { @@ -88,9 +105,17 @@ type TransportManager interface { // on delivery, and the target failing to process the message should be considered a possible // situation to recover from (although not critical path). // - // e.g. at-most-once delivery semantics + // at-most-once delivery semantics Send(ctx context.Context, message *TransportMessage) error + // Sends a message with at-least-once delivery semantics + // + // The message is persisted to the DB in the supplied transaction, then sent on the wire with indefinite retry + // including over node restart, until an ack is returned from the remote node. + // + // The pre-commit handler must be called after the DB transaction commits to trigger the delivery. + SendReliable(ctx context.Context, dbTX *gorm.DB, msg *TransportMessage) (preCommit func(), err error) + // RegisterClient registers a client to receive messages from the transport manager // messages are routed to the client based on the Destination field of the message matching the value returned from Destination() function of the TransportClient RegisterClient(ctx context.Context, client TransportClient) error diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 50d877ac4..a932a1480 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -25,10 +25,12 @@ import ( "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/msgs" + "gorm.io/gorm" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" "github.com/kaleido-io/paladin/toolkit/pkg/rpcserver" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" ) type transportManager struct { @@ -197,14 +199,15 @@ func (tm *transportManager) LocalNodeName() string { return tm.localNodeName } -// See docs in components package -func (tm *transportManager) Send(ctx context.Context, msg *components.TransportMessage) error { +func (tm *transportManager) prepareNewMessage(ctx context.Context, msg *components.TransportMessage) (*peer, error) { + msg.Created = tktypes.TimestampNow() + msg.MessageID = uuid.New() // Check the message is valid if len(msg.MessageType) == 0 || len(msg.Payload) == 0 { log.L(ctx).Errorf("Invalid message send request %+v", msg) - return i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) + return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } if msg.ReplyTo == "" { @@ -213,6 +216,17 @@ func (tm *transportManager) Send(ctx context.Context, msg *components.TransportM // Use or establish a peer connection for the send peer, err := tm.getPeer(ctx, msg.Node) + if err != nil { + return nil, err + } + + return peer, nil +} + +// See docs in components package +func (tm *transportManager) Send(ctx context.Context, msg *components.TransportMessage) error { + + peer, err := tm.prepareNewMessage(ctx, msg) if err != nil { return err } @@ -224,3 +238,23 @@ func (tm *transportManager) Send(ctx context.Context, msg *components.TransportM return peer.send(ctx, msg) } + +// See docs in components package +func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg *components.TransportMessage) (preCommit func(), err error) { + + peer, err := tm.prepareNewMessage(ctx, msg) + if err != nil { + return nil, err + } + + err = dbTX. + WithContext(ctx). + Create(msg). + Error + if err != nil { + return nil, err + } + + return peer.notifyPersistedMsgAvailable, nil + +} diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 1ad0c9518..c415c083a 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -20,7 +20,6 @@ import ( "context" "sort" - "github.com/google/uuid" "github.com/hyperledger/firefly-common/pkg/i18n" "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/core/internal/components" @@ -145,10 +144,6 @@ func (p *peer) send(ctx context.Context, msg *components.TransportMessage) error if msg.CorrelationID != nil { correlID = confutil.P(msg.CorrelationID.String()) } - var zeroUUID uuid.UUID - if msg.MessageID == zeroUUID { - msg.MessageID = uuid.New() - } pMsg := &prototk.Message{ MessageType: msg.MessageType, MessageId: msg.MessageID.String(), From 9f34a57a14cb3ae35b97dd6ad54f60edceab77ab Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Fri, 27 Dec 2024 11:40:35 -0500 Subject: [PATCH 03/41] Update transport proto interface for activate/deactive lifecycle Signed-off-by: Peter Broadhurst --- .../000014_peer_queued_messages.up.sql | 1 + .../sqlite/000014_peer_queued_messages.up.sql | 1 + core/go/internal/transportmgr/manager.go | 8 +- core/go/internal/transportmgr/peer.go | 26 ++-- .../go/pkg/plugintk/plugin_type_transport.go | 20 +++ .../plugintk/plugin_type_transport_test.go | 54 +++++-- toolkit/proto/protos/service.proto | 5 + toolkit/proto/protos/to_transport.proto | 16 ++ .../grpc/internal/grpctransport/config.go | 4 + .../internal/grpctransport/grpc_transport.go | 146 ++++++------------ .../grpctransport/grpc_transport_test.go | 67 ++------ .../internal/grpctransport/outbound_conn.go | 113 ++++++++++++++ .../grpctransport/tls_verifier_test.go | 138 ++++++++--------- transports/grpc/internal/msgs/en_errors.go | 3 + 14 files changed, 360 insertions(+), 242 deletions(-) create mode 100644 transports/grpc/internal/grpctransport/outbound_conn.go diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index 9d8077e5c..284d1ba10 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -17,6 +17,7 @@ CREATE TABLE queued_msgs ( ); CREATE INDEX queued_msgs_node ON queued_msgs ("node"); +CREATE INDEX queued_msgs_created ON queued_msgs ("created"); CREATE TABLE queued_msg_acks ( "id" TEXT NOT NULL, diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index 9d8077e5c..284d1ba10 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -17,6 +17,7 @@ CREATE TABLE queued_msgs ( ); CREATE INDEX queued_msgs_node ON queued_msgs ("node"); +CREATE INDEX queued_msgs_created ON queued_msgs ("created"); CREATE TABLE queued_msg_acks ( "id" TEXT NOT NULL, diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index a932a1480..fe09a4c2e 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -235,7 +235,13 @@ func (tm *transportManager) Send(ctx context.Context, msg *components.TransportM // There is some retry in the Paladin layer, and some transports provide resilience. // However, the send is at-most-once, and the higher level message protocols that // use this "send" must be fault tolerant to message loss. - return peer.send(ctx, msg) + select { + case peer.sendQueue <- msg: + log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageID, msg.CorrelationID, peer.name) + return nil + case <-ctx.Done(): + return i18n.NewError(ctx, msgs.MsgContextCanceled) + } } diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index c415c083a..da98b513a 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -37,7 +37,7 @@ type peer struct { transport *transport persistedMsgsAvailable chan struct{} - sendQueue chan *prototk.Message + sendQueue chan *components.TransportMessage done chan struct{} } @@ -93,7 +93,7 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer tm: tm, name: nodeName, persistedMsgsAvailable: make(chan struct{}, 1), - sendQueue: make(chan *prototk.Message, tm.senderBufferLen), + sendQueue: make(chan *components.TransportMessage, tm.senderBufferLen), done: make(chan struct{}), } p.ctx, p.cancelCtx = context.WithCancel( @@ -153,16 +153,8 @@ func (p *peer) send(ctx context.Context, msg *components.TransportMessage) error ReplyTo: msg.ReplyTo, Payload: msg.Payload, } + return p.transport.send(ctx, pMsg) - // Push onto the sender channel as a fire-and-forget message, for the - // goroutine to handle (alongside dispatching persisted messages) - select { - case p.sendQueue <- pMsg: - log.L(ctx).Debugf("sending %s message %s (cid=%v)", msg.MessageType, msg.MessageID, msg.CorrelationID) - return nil - case <-ctx.Done(): - return i18n.NewError(ctx, msgs.MsgContextCanceled) - } } func (p *peer) sender() { @@ -170,11 +162,23 @@ func (p *peer) sender() { log.L(p.ctx).Infof("peer %s active", p.name) + var persistedStale bool + var persistedPage []*components.TransportMessage for { + + var nextMessage *components.TransportMessage + + if len(persistedPage) > 0 { + + } + select { case <-p.ctx.Done(): log.L(p.ctx).Infof("peer %s inactive", p.name) return + case <-p.persistedMsgsAvailable: + persistedStale = true + case nextMessage = <-p.sendQueue: } } } diff --git a/toolkit/go/pkg/plugintk/plugin_type_transport.go b/toolkit/go/pkg/plugintk/plugin_type_transport.go index cc08dfc52..1c7b3ffa1 100644 --- a/toolkit/go/pkg/plugintk/plugin_type_transport.go +++ b/toolkit/go/pkg/plugintk/plugin_type_transport.go @@ -28,6 +28,8 @@ type TransportAPI interface { ConfigureTransport(context.Context, *prototk.ConfigureTransportRequest) (*prototk.ConfigureTransportResponse, error) SendMessage(context.Context, *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) GetLocalDetails(context.Context, *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) + ActivateNode(context.Context, *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) + DeactivateNode(context.Context, *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) } type TransportCallbacks interface { @@ -128,6 +130,14 @@ func (th *transportHandler) RequestToPlugin(ctx context.Context, iReq PluginMess resMsg := &prototk.TransportMessage_GetLocalDetailsRes{} resMsg.GetLocalDetailsRes, err = th.api.GetLocalDetails(ctx, input.GetLocalDetails) res.ResponseFromTransport = resMsg + case *prototk.TransportMessage_ActivateNode: + resMsg := &prototk.TransportMessage_ActivateNodeRes{} + resMsg.ActivateNodeRes, err = th.api.ActivateNode(ctx, input.ActivateNode) + res.ResponseFromTransport = resMsg + case *prototk.TransportMessage_DeactivateNode: + resMsg := &prototk.TransportMessage_DeactivateNodeRes{} + resMsg.DeactivateNodeRes, err = th.api.DeactivateNode(ctx, input.DeactivateNode) + res.ResponseFromTransport = resMsg default: err = i18n.NewError(ctx, tkmsgs.MsgPluginUnsupportedRequest, input) } @@ -160,6 +170,8 @@ type TransportAPIFunctions struct { ConfigureTransport func(context.Context, *prototk.ConfigureTransportRequest) (*prototk.ConfigureTransportResponse, error) SendMessage func(context.Context, *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) GetLocalDetails func(context.Context, *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) + ActivateNode func(context.Context, *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) + DeactivateNode func(context.Context, *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) } type TransportAPIBase struct { @@ -177,3 +189,11 @@ func (tb *TransportAPIBase) SendMessage(ctx context.Context, req *prototk.SendMe func (tb *TransportAPIBase) GetLocalDetails(ctx context.Context, req *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) { return callPluginImpl(ctx, req, tb.Functions.GetLocalDetails) } + +func (tb *TransportAPIBase) ActivateNode(ctx context.Context, req *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + return callPluginImpl(ctx, req, tb.Functions.ActivateNode) +} + +func (tb *TransportAPIBase) DeactivateNode(ctx context.Context, req *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + return callPluginImpl(ctx, req, tb.Functions.DeactivateNode) +} diff --git a/toolkit/go/pkg/plugintk/plugin_type_transport_test.go b/toolkit/go/pkg/plugintk/plugin_type_transport_test.go index f5ca7985d..155e54483 100644 --- a/toolkit/go/pkg/plugintk/plugin_type_transport_test.go +++ b/toolkit/go/pkg/plugintk/plugin_type_transport_test.go @@ -62,29 +62,29 @@ func setupTransportTests(t *testing.T) (context.Context, *pluginExerciser[protot } } -func TestTransportCallback_GetTransportDetails(t *testing.T) { +func TestTransportCallback_ReceiveMessage(t *testing.T) { ctx, _, _, callbacks, inOutMap, done := setupTransportTests(t) defer done() - inOutMap[fmt.Sprintf("%T", &prototk.TransportMessage_GetTransportDetails{})] = func(dm *prototk.TransportMessage) { - dm.ResponseToTransport = &prototk.TransportMessage_GetTransportDetailsRes{ - GetTransportDetailsRes: &prototk.GetTransportDetailsResponse{}, + inOutMap[fmt.Sprintf("%T", &prototk.TransportMessage_ReceiveMessage{})] = func(dm *prototk.TransportMessage) { + dm.ResponseToTransport = &prototk.TransportMessage_ReceiveMessageRes{ + ReceiveMessageRes: &prototk.ReceiveMessageResponse{}, } } - _, err := callbacks.GetTransportDetails(ctx, &prototk.GetTransportDetailsRequest{}) + _, err := callbacks.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{}) require.NoError(t, err) } -func TestTransportCallback_ReceiveMessage(t *testing.T) { +func TestTransportCallback_GetTransportDetails(t *testing.T) { ctx, _, _, callbacks, inOutMap, done := setupTransportTests(t) defer done() - inOutMap[fmt.Sprintf("%T", &prototk.TransportMessage_ReceiveMessage{})] = func(dm *prototk.TransportMessage) { - dm.ResponseToTransport = &prototk.TransportMessage_ReceiveMessageRes{ - ReceiveMessageRes: &prototk.ReceiveMessageResponse{}, + inOutMap[fmt.Sprintf("%T", &prototk.TransportMessage_GetTransportDetails{})] = func(dm *prototk.TransportMessage) { + dm.ResponseToTransport = &prototk.TransportMessage_GetTransportDetailsRes{ + GetTransportDetailsRes: &prototk.GetTransportDetailsResponse{}, } } - _, err := callbacks.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{}) + _, err := callbacks.GetTransportDetails(ctx, &prototk.GetTransportDetailsRequest{}) require.NoError(t, err) } @@ -139,6 +139,40 @@ func TestTransportFunction_GetLocalDetails(t *testing.T) { }) } +func TestTransportFunction_ActivateNode(t *testing.T) { + _, exerciser, funcs, _, _, done := setupTransportTests(t) + defer done() + + // InitTransport - paladin to transport + funcs.ActivateNode = func(ctx context.Context, cdr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + return &prototk.ActivateNodeResponse{}, nil + } + exerciser.doExchangeToPlugin(func(req *prototk.TransportMessage) { + req.RequestToTransport = &prototk.TransportMessage_ActivateNode{ + ActivateNode: &prototk.ActivateNodeRequest{}, + } + }, func(res *prototk.TransportMessage) { + assert.IsType(t, &prototk.TransportMessage_ActivateNodeRes{}, res.ResponseFromTransport) + }) +} + +func TestTransportFunction_DeactivateNode(t *testing.T) { + _, exerciser, funcs, _, _, done := setupTransportTests(t) + defer done() + + // InitTransport - paladin to transport + funcs.DeactivateNode = func(ctx context.Context, cdr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + return &prototk.DeactivateNodeResponse{}, nil + } + exerciser.doExchangeToPlugin(func(req *prototk.TransportMessage) { + req.RequestToTransport = &prototk.TransportMessage_DeactivateNode{ + DeactivateNode: &prototk.DeactivateNodeRequest{}, + } + }, func(res *prototk.TransportMessage) { + assert.IsType(t, &prototk.TransportMessage_DeactivateNodeRes{}, res.ResponseFromTransport) + }) +} + func TestTransportRequestError(t *testing.T) { _, exerciser, _, _, _, done := setupTransportTests(t) defer done() diff --git a/toolkit/proto/protos/service.proto b/toolkit/proto/protos/service.proto index 28909604c..387588e30 100644 --- a/toolkit/proto/protos/service.proto +++ b/toolkit/proto/protos/service.proto @@ -105,12 +105,16 @@ message TransportMessage { ConfigureTransportRequest configure_transport = 1010; SendMessageRequest send_message = 1020; GetLocalDetailsRequest get_local_details = 1030; + ActivateNodeRequest activate_node = 1040; + DeactivateNodeRequest deactivate_node = 1050; } oneof response_from_transport { ConfigureTransportResponse configure_transport_res = 1011; SendMessageResponse send_message_res = 1021; GetLocalDetailsResponse get_local_details_res = 1031; + ActivateNodeResponse activate_node_res = 1041; + DeactivateNodeResponse deactivate_node_res = 1051; } // Request/reply exchanges initiated by the transport, to the paladin node @@ -123,6 +127,7 @@ message TransportMessage { GetTransportDetailsResponse get_transport_details_res = 2011; ReceiveMessageResponse receive_message_res = 2021; } + } message RegistryMessage { diff --git a/toolkit/proto/protos/to_transport.proto b/toolkit/proto/protos/to_transport.proto index 4d71150ea..0aa00d3ab 100644 --- a/toolkit/proto/protos/to_transport.proto +++ b/toolkit/proto/protos/to_transport.proto @@ -32,6 +32,22 @@ message SendMessageRequest { message SendMessageResponse { } +message ActivateNodeRequest { + string node_name = 1; + string transport_details = 2; +} + +message ActivateNodeResponse { + string peer_info_json = 1; +} + +message DeactivateNodeRequest { + string node_name = 1; +} + +message DeactivateNodeResponse { +} + message GetLocalDetailsRequest { } diff --git a/transports/grpc/internal/grpctransport/config.go b/transports/grpc/internal/grpctransport/config.go index 7a7700831..fe3115352 100644 --- a/transports/grpc/internal/grpctransport/config.go +++ b/transports/grpc/internal/grpctransport/config.go @@ -54,3 +54,7 @@ type PublishedTransportDetails struct { // - must be the direct parent (not the root of a chain - for that use normal CA verification) Issuers string `json:"issuers,omitempty"` } + +type PeerInfo struct { + Endpoint string `json:"endpoint"` +} diff --git a/transports/grpc/internal/grpctransport/grpc_transport.go b/transports/grpc/internal/grpctransport/grpc_transport.go index 6e42246c8..c345adf8a 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport.go +++ b/transports/grpc/internal/grpctransport/grpc_transport.go @@ -59,19 +59,10 @@ type grpcTransport struct { localCertificate *tls.Certificate conf Config - connLock sync.Cond + connLock sync.RWMutex outboundConnections map[string]*outboundConn } -type outboundConn struct { - nodeName string - connecting bool - sendLock sync.Mutex - waiting int - connError error - stream grpc.ClientStreamingClient[proto.Message, proto.Empty] -} - func NewPlugin(ctx context.Context) plugintk.PluginBase { return plugintk.NewTransport(NewGRPCTransport) } @@ -80,15 +71,14 @@ func NewGRPCTransport(callbacks plugintk.TransportCallbacks) plugintk.TransportA return &grpcTransport{ bgCtx: context.Background(), callbacks: callbacks, - connLock: *sync.NewCond(new(sync.Mutex)), outboundConnections: make(map[string]*outboundConn), } } func (t *grpcTransport) ConfigureTransport(ctx context.Context, req *prototk.ConfigureTransportRequest) (*prototk.ConfigureTransportResponse, error) { // Hold the connlock while setting our state (as we'll read it when creating new conns) - t.connLock.L.Lock() - defer t.connLock.L.Unlock() + t.connLock.Lock() + defer t.connLock.Unlock() t.name = req.Name @@ -242,85 +232,47 @@ func (t *grpcTransport) getTransportDetails(ctx context.Context, node string) (t return transportDetails, nil } -func (t *grpcTransport) waitExistingOrNewConn(nodeName string) (bool, *outboundConn, error) { - t.connLock.L.Lock() - defer t.connLock.L.Unlock() - existing := t.outboundConnections[nodeName] +func (t *grpcTransport) ActivateNode(ctx context.Context, req *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + t.connLock.Lock() + defer t.connLock.Unlock() + + existing := t.outboundConnections[req.NodeName] if existing != nil { - // Multiple routines might try to connect concurrently, so we have a condition - existing.waiting++ - for existing.connecting { - t.connLock.Wait() - } - return false, existing, existing.connError + // Replace an existing connection - unexpected as Paladin shouldn't do this + log.L(ctx).Warnf("replacing existing activation for node '%s'", req.NodeName) + existing.close(ctx) + delete(t.outboundConnections, req.NodeName) } - // We need to create the connection - put the placeholder in the map - newConn := &outboundConn{nodeName: nodeName, connecting: true} - t.outboundConnections[nodeName] = newConn - return true, newConn, nil -} - -func (t *grpcTransport) send(ctx context.Context, oc *outboundConn, message *proto.Message) (err error) { - oc.sendLock.Lock() - defer func() { - if err != nil { - // Close this stream and remove it before dropping the lock (unsafe to call concurrent to send) - log.L(ctx).Errorf("closing stream to %s due to send err: %s", oc.nodeName, err) - _ = oc.stream.CloseSend() - // Drop the send lock before taking conn lock to remove from the connections - oc.sendLock.Unlock() - t.connLock.L.Lock() - defer t.connLock.L.Unlock() - delete(t.outboundConnections, oc.nodeName) - } else { - // Just drop the lock and return - oc.sendLock.Unlock() - } - }() - err = oc.stream.Send(message) - return + oc, peerInfoJSON, err := t.newConnection(ctx, req.NodeName, req.TransportDetails) + if err != nil { + return nil, err + } + t.outboundConnections[req.NodeName] = oc + return &prototk.ActivateNodeResponse{ + PeerInfoJson: string(peerInfoJSON), + }, nil } -func (t *grpcTransport) getConnection(ctx context.Context, nodeName string) (*outboundConn, error) { +func (t *grpcTransport) DeactivateNode(ctx context.Context, req *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + t.connLock.Lock() + defer t.connLock.Unlock() - isNew, oc, err := t.waitExistingOrNewConn(nodeName) - if !isNew || err != nil { - return oc, err + existing := t.outboundConnections[req.NodeName] + if existing != nil { + // Replace an existing connection - unexpected as Paladin shouldn't do this + log.L(ctx).Warnf("replacing existing activation for node '%s'", req.NodeName) + existing.close(ctx) + delete(t.outboundConnections, req.NodeName) } - // We must ensure we complete the newConn (for good or bad) - // and notify everyone waiting to check status before we return - defer func() { - t.connLock.L.Lock() - oc.connecting = false - if err != nil { - // copy our error to anyone queuing - everybody fails - oc.connError = err - // remove this entry, so the next one will try again - delete(t.outboundConnections, nodeName) - } - t.connLock.Broadcast() - t.connLock.L.Unlock() - }() + return &prototk.DeactivateNodeResponse{}, nil +} - // We need to get the connection details - transportDetails, err := t.getTransportDetails(ctx, nodeName) - if err != nil { - return nil, err - } +func (t *grpcTransport) getConnection(nodeName string) *outboundConn { + t.connLock.RLock() + defer t.connLock.RUnlock() - // Ok - try connecting - log.L(ctx).Infof("GRPC connecting to new peer %s (endpoint=%s)", nodeName, transportDetails.Endpoint) - individualNodeVerifier := t.peerVerifier.Clone().(*tlsVerifier) - individualNodeVerifier.expectedNode = nodeName - conn, err := grpc.NewClient(transportDetails.Endpoint, - grpc.WithTransportCredentials(individualNodeVerifier), - ) - if err == nil { - client := proto.NewPaladinGRPCTransportClient(conn) - oc.stream, err = client.ConnectSendStream(ctx) - } - return oc, err + return t.outboundConnections[nodeName] } func (t *grpcTransport) SendMessage(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { @@ -328,20 +280,22 @@ func (t *grpcTransport) SendMessage(ctx context.Context, req *prototk.SendMessag if req.Message.Node == "" { return nil, i18n.NewError(ctx, msgs.MsgErrorNoTargetNode) } - oc, err := t.getConnection(ctx, msg.Node) - if err == nil { - log.L(ctx).Infof("GRPC sending message id=%s cid=%v component=%s messageType=%s replyTo=%s to peer %s", - msg.MessageId, msg.CorrelationId, msg.Component, msg.MessageType, msg.ReplyTo, msg.Node) - err = t.send(ctx, oc, &proto.Message{ - MessageId: msg.MessageId, - CorrelationId: msg.CorrelationId, - Component: msg.Component, - Node: msg.Node, - ReplyTo: msg.ReplyTo, - MessageType: msg.MessageType, - Payload: msg.Payload, - }) + oc := t.getConnection(msg.Node) + if oc == nil { + // This is an error in the Paladin layer + return nil, i18n.NewError(ctx, msgs.MsgNodeNotActive, msg.Node) } + log.L(ctx).Infof("GRPC sending message id=%s cid=%v component=%s messageType=%s replyTo=%s to peer %s", + msg.MessageId, msg.CorrelationId, msg.Component, msg.MessageType, msg.ReplyTo, msg.Node) + err := oc.send(&proto.Message{ + MessageId: msg.MessageId, + CorrelationId: msg.CorrelationId, + Component: msg.Component, + Node: msg.Node, + ReplyTo: msg.ReplyTo, + MessageType: msg.MessageType, + Payload: msg.Payload, + }) if err != nil { return nil, err } diff --git a/transports/grpc/internal/grpctransport/grpc_transport_test.go b/transports/grpc/internal/grpctransport/grpc_transport_test.go index e5be78141..0df81880c 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport_test.go +++ b/transports/grpc/internal/grpctransport/grpc_transport_test.go @@ -20,7 +20,6 @@ import ( "fmt" "net" "testing" - "time" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/stretchr/testify/assert" @@ -181,14 +180,18 @@ func TestConnectFail(t *testing.T) { plugin2.grpcServer.Stop() - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, - }) - assert.Regexp(t, "rpc error", err) + // gRPC does not guarantee we get the error immediately + var err error + for err == nil { + _, err = plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ + Message: &prototk.Message{ + ReplyTo: "node1", + Component: "to.you", + Node: "node2", + }, + }) + } + assert.Error(t, err) } @@ -196,21 +199,12 @@ func TestConnectBadTransport(t *testing.T) { ctx := context.Background() - plugin1, _, done := newSuccessfulVerifiedConnection(t, func(callbacks1, _ *testCallbacks) { - callbacks1.getTransportDetails = func(ctx context.Context, gtdr *prototk.GetTransportDetailsRequest) (*prototk.GetTransportDetailsResponse, error) { - return &prototk.GetTransportDetailsResponse{ - TransportDetails: `{"endpoint": "WRONG:::::::"}`, - }, nil - } - }) + plugin1, _, done := newSuccessfulVerifiedConnection(t) defer done() - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: `{"endpoint": "WRONG:::::::"}`, }) assert.Regexp(t, "WRONG", err) @@ -270,32 +264,3 @@ func TestConnectSendStreamBadSecurityCtx(t *testing.T) { } assert.Error(t, err) } - -func TestWaitNewConn(t *testing.T) { - - plugin, _, _, done := newTestGRPCTransport(t, "", "", &Config{}) - defer done() - - isNew, oc, err := plugin.waitExistingOrNewConn("node1") - assert.True(t, isNew) - assert.Nil(t, err) - - bgError := make(chan error) - go func() { - _, _, err := plugin.waitExistingOrNewConn("node1") - bgError <- err - }() - - for oc.waiting == 0 { - time.Sleep(1 * time.Millisecond) - } - - plugin.connLock.L.Lock() - oc.connecting = false - oc.connError = fmt.Errorf("pop") - plugin.connLock.Broadcast() - plugin.connLock.L.Unlock() - - assert.Regexp(t, "pop", <-bgError) - -} diff --git a/transports/grpc/internal/grpctransport/outbound_conn.go b/transports/grpc/internal/grpctransport/outbound_conn.go new file mode 100644 index 000000000..30773bb2f --- /dev/null +++ b/transports/grpc/internal/grpctransport/outbound_conn.go @@ -0,0 +1,113 @@ +/* + * Copyright © 2024 Kaleido, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package grpctransport + +import ( + "context" + "encoding/json" + "sync" + + "github.com/hyperledger/firefly-common/pkg/i18n" + "github.com/kaleido-io/paladin/toolkit/pkg/log" + "github.com/kaleido-io/paladin/transports/grpc/internal/msgs" + "github.com/kaleido-io/paladin/transports/grpc/pkg/proto" + "google.golang.org/grpc" +) + +type outboundConn struct { + t *grpcTransport + nodeName string + client proto.PaladinGRPCTransportClient + peerInfo PeerInfo + sendLock sync.Mutex + waiting int + connError error + stream grpc.ClientStreamingClient[proto.Message, proto.Empty] +} + +func (t *grpcTransport) newConnection(ctx context.Context, nodeName string, transportDetailsJSON string) (oc *outboundConn, peerInfoJSON []byte, err error) { + + // Parse the connection details + var transportDetails PublishedTransportDetails + err = json.Unmarshal([]byte(transportDetailsJSON), &transportDetails) + if err == nil { + oc = &outboundConn{ + t: t, + nodeName: nodeName, + peerInfo: PeerInfo{ + Endpoint: transportDetails.Endpoint, + }, + } + peerInfoJSON, err = json.Marshal(&oc.peerInfo) + } + if err != nil { + return nil, nil, i18n.WrapError(ctx, err, msgs.MsgInvalidTransportDetails, nodeName) + } + + // Create the gRPC connection (it's not actually connected until we use it) + individualNodeVerifier := oc.t.peerVerifier.Clone().(*tlsVerifier) + individualNodeVerifier.expectedNode = oc.nodeName + grpcConn, err := grpc.NewClient(transportDetails.Endpoint, + grpc.WithTransportCredentials(individualNodeVerifier), + ) + if err == nil { + oc.client = proto.NewPaladinGRPCTransportClient(grpcConn) + err = oc.ensureStream() + } + if err != nil { + return nil, nil, i18n.WrapError(ctx, err, msgs.MsgConnectionFailed, transportDetails.Endpoint) + } + + return oc, peerInfoJSON, nil +} + +func (oc *outboundConn) close(ctx context.Context) { + oc.sendLock.Lock() + defer oc.sendLock.Unlock() + + log.L(ctx).Errorf("cleaning up connection to %s", oc.nodeName) + + if oc.stream != nil { + _ = oc.stream.CloseSend() + oc.stream = nil + } +} + +func (oc *outboundConn) ensureStream() (err error) { + if oc.stream != nil { + return nil + } + log.L(oc.t.bgCtx).Infof("GRPC establishing new stream to peer %s (endpoint=%s)", oc.nodeName, oc.peerInfo.Endpoint) + oc.stream, err = oc.client.ConnectSendStream(oc.t.bgCtx) + return err +} + +func (oc *outboundConn) send(message *proto.Message) (err error) { + oc.sendLock.Lock() + defer oc.sendLock.Unlock() + + if err := oc.ensureStream(); err != nil { + return err + } + + err = oc.stream.Send(message) + if err != nil { + // Clean up the stream - we'll create a new one on next send + _ = oc.stream.CloseSend() + oc.stream = nil + } + return err +} diff --git a/transports/grpc/internal/grpctransport/tls_verifier_test.go b/transports/grpc/internal/grpctransport/tls_verifier_test.go index 9be05c550..6d7a9a8cc 100644 --- a/transports/grpc/internal/grpctransport/tls_verifier_test.go +++ b/transports/grpc/internal/grpctransport/tls_verifier_test.go @@ -175,12 +175,36 @@ func newSuccessfulVerifiedConnection(t *testing.T, setup ...func(callbacks1, cal fn(callbacks1, callbacks2) } + deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) + return plugin1, plugin2, func() { + deactivate() done1() done2() } } +func testActivateNode(t *testing.T, sender *grpcTransport, remoteNodeName string, transportDetails *PublishedTransportDetails) func() { + + ctx := context.Background() + + res, err := sender.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: remoteNodeName, + TransportDetails: tktypes.JSONString(transportDetails).Pretty(), + }) + assert.NoError(t, err) + assert.NotNil(t, res) + + return func() { + res, err := sender.DeactivateNode(ctx, &prototk.DeactivateNodeRequest{ + NodeName: remoteNodeName, + }) + assert.NoError(t, err) + assert.NotNil(t, res) + } + +} + func TestGRPCTransport_DirectCertVerification_OK(t *testing.T) { ctx := context.Background() @@ -246,6 +270,8 @@ func TestGRPCTransport_DirectCertVerificationWithKeyRotation_OK(t *testing.T) { } // Connect and send from plugin1 to plugin2 + deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) + defer deactivate() sendRes, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ Message: &prototk.Message{ ReplyTo: "node1", @@ -301,6 +327,8 @@ func TestGRPCTransport_CACertVerificationWithSubjectRegex_OK(t *testing.T) { mockRegistry(callbacks2, ptds) // Connect and send from plugin1 to plugin2 + deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) + defer deactivate() sendRes, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ Message: &prototk.Message{ ReplyTo: "node1", @@ -346,12 +374,9 @@ func TestGRPCTransport_CAServerWrongCA(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err = plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err = plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Error(t, err) @@ -386,12 +411,9 @@ func TestGRPCTransport_CAClientWrongCA(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err = plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err = plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Error(t, err) @@ -417,12 +439,9 @@ func TestGRPCTransport_DirectCertVerification_WrongIssuerServer(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Regexp(t, "PD030007", err) @@ -448,12 +467,9 @@ func TestGRPCTransport_DirectCertVerification_WrongIssuerClient(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Error(t, err) @@ -476,12 +492,9 @@ func TestGRPCTransport_DirectCertVerification_BadIssuersServer(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Regexp(t, "PD030012", err) @@ -505,12 +518,9 @@ func TestGRPCTransport_SubjectRegexpMismatch(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Regexp(t, "PD030008", err) @@ -532,12 +542,9 @@ func TestGRPCTransport_ClientWrongNode(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node3", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node3", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Regexp(t, "PD030011", err) @@ -563,12 +570,9 @@ func TestGRPCTransport_BadTransportDetails(t *testing.T) { ptds := map[string]*PublishedTransportDetails{"node1": transportDetails1, "node2": transportDetails2} mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Regexp(t, "PD030006", err) @@ -592,12 +596,9 @@ func TestGRPCTransport_BadTransportIssuerPEM(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Regexp(t, "PD030012", err) @@ -619,12 +620,9 @@ func TestGRPCTransport_NodeUnknownToServer(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, map[string]*PublishedTransportDetails{}) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Error(t, err) @@ -642,16 +640,13 @@ func TestGRPCTransport_NodeUnknownToClient(t *testing.T) { _, transportDetails2, callbacks2, done2 := newTestGRPCTransport(t, node2Cert, node2Key, &Config{}) defer done2() - ptds := map[string]*PublishedTransportDetails{"node1": transportDetails1, "node2": transportDetails2} + ptds := map[string]*PublishedTransportDetails{"node1": transportDetails1} mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node3", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Regexp(t, "not found", err) @@ -674,12 +669,9 @@ func TestGRPCTransport_ServerRejectNoCerts(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) assert.Error(t, err) diff --git a/transports/grpc/internal/msgs/en_errors.go b/transports/grpc/internal/msgs/en_errors.go index 4f5b05eb0..1230802f7 100644 --- a/transports/grpc/internal/msgs/en_errors.go +++ b/transports/grpc/internal/msgs/en_errors.go @@ -47,4 +47,7 @@ var ( MsgConnectionToWrongNode = ffe("PD030011", "the TLS identity of the node '%s' does not match the expected node '%s'") MsgPEMCertificateInvalid = ffe("PD030012", "invalid PEM encoded x509 certificate") MsgErrorNoTargetNode = ffe("PD030013", "request to send message but no target node specified") + MsgInvalidTransportDetails = ffe("PD030014", "Invalid transport details for node '%s'") + MsgConnectionFailed = ffe("PD030015", "GRPC connection failed for endpoint '%s'") + MsgNodeNotActive = ffe("PD030016", "Send for node that is not active '%s'") ) From 6f155173a28f5f097c2d9b7b7b00319eebd76ffb Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Fri, 27 Dec 2024 12:15:14 -0500 Subject: [PATCH 04/41] Close out on gRPC transport changes for activate/deactivate Signed-off-by: Peter Broadhurst --- .../grpctransport/grpc_transport_test.go | 41 +++++++++++++++++++ .../internal/grpctransport/outbound_conn.go | 9 ++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/transports/grpc/internal/grpctransport/grpc_transport_test.go b/transports/grpc/internal/grpctransport/grpc_transport_test.go index 0df81880c..5c2af5fce 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport_test.go +++ b/transports/grpc/internal/grpctransport/grpc_transport_test.go @@ -195,6 +195,47 @@ func TestConnectFail(t *testing.T) { } +func TestSendNotActivated(t *testing.T) { + + ctx := context.Background() + + plugin1, _, done := newSuccessfulVerifiedConnection(t, func(_, callbacks2 *testCallbacks) { + callbacks2.receiveMessage = func(ctx context.Context, rmr *prototk.ReceiveMessageRequest) (*prototk.ReceiveMessageResponse, error) { + return &prototk.ReceiveMessageResponse{}, nil + } + }) + defer done() + + _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ + Message: &prototk.Message{ + ReplyTo: "node1", + Component: "to.you", + Node: "node3", + }, + }) + assert.Regexp(t, "PD030016", err) + +} + +func TestActivateBadTransportDetails(t *testing.T) { + + ctx := context.Background() + + plugin1, _, done := newSuccessfulVerifiedConnection(t, func(_, callbacks2 *testCallbacks) { + callbacks2.receiveMessage = func(ctx context.Context, rmr *prototk.ReceiveMessageRequest) (*prototk.ReceiveMessageResponse, error) { + return &prototk.ReceiveMessageResponse{}, nil + } + }) + defer done() + + _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: "node2", + TransportDetails: `{"endpoint": false}`, + }) + assert.Regexp(t, "PD030014", err) + +} + func TestConnectBadTransport(t *testing.T) { ctx := context.Background() diff --git a/transports/grpc/internal/grpctransport/outbound_conn.go b/transports/grpc/internal/grpctransport/outbound_conn.go index 30773bb2f..a4f153876 100644 --- a/transports/grpc/internal/grpctransport/outbound_conn.go +++ b/transports/grpc/internal/grpctransport/outbound_conn.go @@ -95,15 +95,16 @@ func (oc *outboundConn) ensureStream() (err error) { return err } -func (oc *outboundConn) send(message *proto.Message) (err error) { +func (oc *outboundConn) send(message *proto.Message) error { oc.sendLock.Lock() defer oc.sendLock.Unlock() - if err := oc.ensureStream(); err != nil { - return err + err := oc.ensureStream() + + if err == nil { + err = oc.stream.Send(message) } - err = oc.stream.Send(message) if err != nil { // Clean up the stream - we'll create a new one on next send _ = oc.stream.CloseSend() From 44defafd38d04ffb8d9023adf170893c7182bb5a Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sat, 28 Dec 2024 08:56:31 -0500 Subject: [PATCH 05/41] State distribution message building and sender loop Signed-off-by: Peter Broadhurst --- config/pkg/pldconf/transportmgr.go | 22 +- .../000014_peer_queued_messages.down.sql | 4 +- .../000014_peer_queued_messages.up.sql | 20 +- .../000014_peer_queued_messages.down.sql | 4 +- .../sqlite/000014_peer_queued_messages.up.sql | 20 +- core/go/internal/components/privatetxmgr.go | 34 +-- core/go/internal/components/transportmgr.go | 60 +++-- core/go/internal/msgs/en_errors.go | 2 + .../privatetxnmgr/assemble_coordinator.go | 2 +- .../privatetxnmgr/ptmgrtypes/types.go | 2 +- .../privatetxnmgr/sequencer_dispatch.go | 4 +- .../state_distribution_builder.go | 6 +- .../state_distribution_builder_test.go | 2 +- .../privatetxnmgr/syncpoints/dispatch.go | 2 +- .../privatetxnmgr/syncpoints/syncpoints.go | 2 +- .../statedistribution/state_distributer.go | 18 +- .../state_distributer_test.go | 8 +- .../statedistribution/state_sender.go | 4 +- .../statedistribution/transport_client.go | 2 +- core/go/internal/transportmgr/manager.go | 74 +++--- core/go/internal/transportmgr/peer.go | 236 ++++++++++++++++-- toolkit/proto/protos/from_transport.proto | 3 +- toolkit/proto/protos/to_transport.proto | 22 +- .../internal/grpctransport/grpc_transport.go | 33 +-- .../grpctransport/grpc_transport_test.go | 68 +---- .../grpctransport/tls_verifier_test.go | 27 +- transports/grpc/internal/msgs/en_errors.go | 2 - transports/grpc/pkg/proto/paladin.proto | 4 +- 28 files changed, 434 insertions(+), 253 deletions(-) diff --git a/config/pkg/pldconf/transportmgr.go b/config/pkg/pldconf/transportmgr.go index 693164efa..d1666d7cc 100644 --- a/config/pkg/pldconf/transportmgr.go +++ b/config/pkg/pldconf/transportmgr.go @@ -17,9 +17,12 @@ package pldconf import "github.com/kaleido-io/paladin/config/pkg/confutil" type TransportManagerConfig struct { - NodeName string `json:"nodeName"` - SendQueueLen *int `json:"sendQueueLen"` - Transports map[string]*TransportConfig `json:"transports"` + NodeName string `json:"nodeName"` + SendQueueLen *int `json:"sendQueueLen"` + SendRetry RetryConfigWithMax `json:"sendRetry"` + ReliableScanRetry RetryConfig `json:"reliableScanRetry"` + ReliableMessageResend *string `json:"reliableMessageResend"` + Transports map[string]*TransportConfig `json:"transports"` } type TransportInitConfig struct { @@ -27,7 +30,18 @@ type TransportInitConfig struct { } var TransportManagerDefaults = &TransportManagerConfig{ - SendQueueLen: confutil.P(10), + SendQueueLen: confutil.P(10), + ReliableMessageResend: confutil.P("30s"), + ReliableScanRetry: GenericRetryDefaults.RetryConfig, + // SendRetry defaults are deliberately short + SendRetry: RetryConfigWithMax{ + RetryConfig: RetryConfig{ + InitialDelay: confutil.P("50ms"), + MaxDelay: confutil.P("1s"), + Factor: confutil.P(2.0), + }, + MaxAttempts: confutil.P(3), + }, } type TransportConfig struct { diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql index a8ee6d412..39434162b 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.down.sql @@ -1,5 +1,5 @@ BEGIN; -DROP TABLE queued_msg_acks; -DROP TABLE queued_msgs; +DROP TABLE reliable_msg_acks; +DROP TABLE reliable_msgs; COMMIT; diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index 284d1ba10..aef4428d2 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -4,26 +4,24 @@ BEGIN; DROP TABLE state_distribution_acknowledgments; DROP TABLE state_distributions; -CREATE TABLE queued_msgs ( - "id" TEXT NOT NULL, +CREATE TABLE reliable_msgs ( + "id" UUID NOT NULL, "created" BIGINT NOT NULL, - "cid" TEXT , "node" TEXT NOT NULL, - "component" TEXT NOT NULL, - "reply_to" TEXT NOT NULL, "msg_type" TEXT NOT NULL, - "payload" TEXT , + "metadata" TEXT NOT NULL, PRIMARY KEY ("id") ); -CREATE INDEX queued_msgs_node ON queued_msgs ("node"); -CREATE INDEX queued_msgs_created ON queued_msgs ("created"); +CREATE INDEX reliable_msgs_node ON reliable_msgs ("node"); +CREATE INDEX reliable_msgs_created ON reliable_msgs ("created"); -CREATE TABLE queued_msg_acks ( - "id" TEXT NOT NULL, +CREATE TABLE reliable_msg_acks ( + "id" UUID NOT NULL, "time" BIGINT NOT NULL, + "error" TEXT, PRIMARY KEY ("id"), - FOREIGN KEY ("id") REFERENCES queued_msgs ("id") ON DELETE CASCADE + FOREIGN KEY ("id") REFERENCES reliable_msgs ("id") ON DELETE CASCADE ); diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql index a8ee6d412..39434162b 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql @@ -1,5 +1,5 @@ BEGIN; -DROP TABLE queued_msg_acks; -DROP TABLE queued_msgs; +DROP TABLE reliable_msg_acks; +DROP TABLE reliable_msgs; COMMIT; diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index 284d1ba10..aef4428d2 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -4,26 +4,24 @@ BEGIN; DROP TABLE state_distribution_acknowledgments; DROP TABLE state_distributions; -CREATE TABLE queued_msgs ( - "id" TEXT NOT NULL, +CREATE TABLE reliable_msgs ( + "id" UUID NOT NULL, "created" BIGINT NOT NULL, - "cid" TEXT , "node" TEXT NOT NULL, - "component" TEXT NOT NULL, - "reply_to" TEXT NOT NULL, "msg_type" TEXT NOT NULL, - "payload" TEXT , + "metadata" TEXT NOT NULL, PRIMARY KEY ("id") ); -CREATE INDEX queued_msgs_node ON queued_msgs ("node"); -CREATE INDEX queued_msgs_created ON queued_msgs ("created"); +CREATE INDEX reliable_msgs_node ON reliable_msgs ("node"); +CREATE INDEX reliable_msgs_created ON reliable_msgs ("created"); -CREATE TABLE queued_msg_acks ( - "id" TEXT NOT NULL, +CREATE TABLE reliable_msg_acks ( + "id" UUID NOT NULL, "time" BIGINT NOT NULL, + "error" TEXT, PRIMARY KEY ("id"), - FOREIGN KEY ("id") REFERENCES queued_msgs ("id") ON DELETE CASCADE + FOREIGN KEY ("id") REFERENCES reliable_msgs ("id") ON DELETE CASCADE ); diff --git a/core/go/internal/components/privatetxmgr.go b/core/go/internal/components/privatetxmgr.go index 75e3cfd96..c0c12ab5a 100644 --- a/core/go/internal/components/privatetxmgr.go +++ b/core/go/internal/components/privatetxmgr.go @@ -20,6 +20,7 @@ import ( "github.com/google/uuid" "github.com/hyperledger/firefly-signer/pkg/abi" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" "gorm.io/gorm" ) @@ -51,29 +52,28 @@ type PrivateTxStatus struct { FailureMessage string `json:"failureMessage,omitempty"` } -// If we had lots of these we would probably want to centralize the assignment of the constants to avoid duplication -// but currently there is only 2 ( the other being IDENTITY_RESOLVER_DESTINATION ) -const PRIVATE_TX_MANAGER_DESTINATION = "private-tx-manager" - type StateDistributionSet struct { LocalNode string SenderNode string - Remote []*StateDistribution - Local []*StateDistribution + Remote []*StateDistributionWithData + Local []*StateDistributionWithData } -// A StateDistribution is an intent to send private data for a given state to a remote party type StateDistribution struct { - ID string - StateID string - IdentityLocator string - Domain string - ContractAddress string - SchemaID string - StateDataJson string - NullifierAlgorithm *string - NullifierVerifierType *string - NullifierPayloadType *string + StateID string `json:"stateId"` + IdentityLocator string `json:"identityLocator"` + Domain string `json:"domain"` + ContractAddress string `json:"contractAddress"` + SchemaID string `json:"schemaId"` + NullifierAlgorithm *string `json:"nullifierAlgorithm"` + NullifierVerifierType *string `json:"nullifierVerifierType"` + NullifierPayloadType *string `json:"nullifierPayloadType"` +} + +// A StateDistributionWithData is an intent to send private data for a given state to a remote party +type StateDistributionWithData struct { + StateDistribution + StateData tktypes.RawJSON `json:"stateData"` } type PrivateTxManager interface { diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index 3d2c30d04..ddba145c9 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -26,29 +26,49 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" ) -func (tma TransportMessage) Table() string { - return "queued_msgs" +type TransportMessage struct { + MessageID uuid.UUID `json:"id"` + CorrelationID *uuid.UUID `json:"correlationId"` + Component string `json:"component"` + Node string `json:"node"` // The node id to send the message to + ReplyTo string `json:"replyTo"` // The identity to respond to on the sending node + MessageType string `json:"messageType"` + Payload []byte `json:"payload"` } -type TransportMessage struct { - MessageID uuid.UUID `json:"id" gorm:"column:id,primaryKey"` - Created tktypes.Timestamp `json:"created" gorm:"column:created,autoCreateTime:false"` // generated in our code - CorrelationID *uuid.UUID `json:"correlationId" gorm:"column:cid"` - Component string `json:"component" gorm:"column:component"` // The name of the component to route the message to once it arrives at the destination node - Node string `json:"node" gorm:"column:node"` // The node id to send the message to - ReplyTo string `json:"replyTo" gorm:"column:reply_to"` // The identity to respond to on the sending node - MessageType string `json:"messageType" gorm:"column:msg_type"` - Payload []byte `json:"payload" gorm:"column:payload"` - *TransportMessageAck `json:",inline" gorm:"foreignKey:pub_txn_id;references:pub_txn_id"` +type ReliableMessageType string + +const ( + RMTState ReliableMessageType = "state" + RMTReceipt ReliableMessageType = "receipt" +) + +func (t ReliableMessageType) Options() []string { + return []string{} } -func (tma TransportMessageAck) Table() string { - return "queued_msg_acks" +type ReliableMessage struct { + ID uuid.UUID `json:"id" gorm:"column:id,primaryKey"` + Created tktypes.Timestamp `json:"created" gorm:"column:created,autoCreateTime:false"` // generated in our code + Node string `json:"node" gorm:"column:node"` // The node id to send the message to + ReplyTo string `json:"replyTo" gorm:"column:reply_to"` // The identity to respond to on the sending node + MessageType tktypes.Enum[ReliableMessageType] `json:"messageType" gorm:"column:msg_type"` + Metadata tktypes.RawJSON `json:"metadata" gorm:"column:metadata"` + Ack *ReliableMessageAck `json:"ack,omitempty" gorm:"foreignKey:id;references:id;"` } -type TransportMessageAck struct { - MessageID uuid.UUID `json:"-" gorm:"column:id,primaryKey"` - AckTime *tktypes.Timestamp `json:"ackTime,omitempty" gorm:"column:time,autoCreateTime:false"` // generated in our code +func (rm ReliableMessage) Table() string { + return "reliable_msgs" +} + +type ReliableMessageAck struct { + MessageID uuid.UUID `json:"-" gorm:"column:id,primaryKey"` + Time tktypes.Timestamp `json:"time,omitempty" gorm:"column:time,autoCreateTime:false"` // generated in our code + Error string `json:"error,omitempty" gorm:"column:error,autoCreateTime:false"` +} + +func (rma ReliableMessageAck) Table() string { + return "reliable_msg_acks" } type TransportManagerToTransport interface { @@ -110,11 +130,15 @@ type TransportManager interface { // Sends a message with at-least-once delivery semantics // + // Each reliable message type has special building code in the transport manager, which assembles the full + // message by combining the metadata in the ReliableMessage with data looked up from other components. + // This avoids the performance and storage cost of writing the big data (states, receipts) multiple times. + // // The message is persisted to the DB in the supplied transaction, then sent on the wire with indefinite retry // including over node restart, until an ack is returned from the remote node. // // The pre-commit handler must be called after the DB transaction commits to trigger the delivery. - SendReliable(ctx context.Context, dbTX *gorm.DB, msg *TransportMessage) (preCommit func(), err error) + SendReliable(ctx context.Context, dbTX *gorm.DB, msg *ReliableMessage) (preCommit func(), err error) // RegisterClient registers a client to receive messages from the transport manager // messages are routed to the client based on the Destination field of the message matching the value returned from Destination() function of the TransportClient diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index 215c6402f..8e6a74397 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -367,6 +367,8 @@ var ( MsgTransportClientAlreadyRegistered = ffe("PD012010", "Client '%s' already registered") MsgTransportDestinationNotFound = ffe("PD012011", "Destination '%s' not found") MsgTransportClientRegisterAfterStartup = ffe("PD012012", "Client '%s' attempted registration after startup") + MsgTransportUnsupportedReliableMsg = ffe("PD012013", "Unsupported reliable message type '%s'") + MsgTransportStateNotAvailableLocally = ffe("PD012014", "State not available locally: domain=%s,contract=%s,id=%s") // RegistryManager module PD0121XX MsgRegistryNodeEntiresNotFound = ffe("PD012100", "No entries found for node '%s'") diff --git a/core/go/internal/privatetxnmgr/assemble_coordinator.go b/core/go/internal/privatetxnmgr/assemble_coordinator.go index 89ee92b31..726690794 100644 --- a/core/go/internal/privatetxnmgr/assemble_coordinator.go +++ b/core/go/internal/privatetxnmgr/assemble_coordinator.go @@ -70,7 +70,7 @@ func NewAssembleCoordinator(ctx context.Context, nodeName string, maxPendingRequ } } -func (ac *assembleCoordinator) Complete(requestID string, stateDistributions []*components.StateDistribution) { +func (ac *assembleCoordinator) Complete(requestID string, stateDistributions []*components.StateDistributionWithData) { log.L(ac.ctx).Debugf("AssembleCoordinator:Commit %s", requestID) ac.stateDistributer.DistributeStates(ac.ctx, stateDistributions) diff --git a/core/go/internal/privatetxnmgr/ptmgrtypes/types.go b/core/go/internal/privatetxnmgr/ptmgrtypes/types.go index cabc1df12..b2f9c32c9 100644 --- a/core/go/internal/privatetxnmgr/ptmgrtypes/types.go +++ b/core/go/internal/privatetxnmgr/ptmgrtypes/types.go @@ -162,7 +162,7 @@ type AssembleCoordinator interface { Start() Stop() QueueAssemble(ctx context.Context, assemblingNode string, transactionID uuid.UUID, transactionPreAssembly *components.TransactionPreAssembly) - Complete(requestID string, stateDistributions []*components.StateDistribution) + Complete(requestID string, stateDistributions []*components.StateDistributionWithData) } type LocalAssembler interface { diff --git a/core/go/internal/privatetxnmgr/sequencer_dispatch.go b/core/go/internal/privatetxnmgr/sequencer_dispatch.go index f1e2df6ee..176e6572d 100644 --- a/core/go/internal/privatetxnmgr/sequencer_dispatch.go +++ b/core/go/internal/privatetxnmgr/sequencer_dispatch.go @@ -45,8 +45,8 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa PublicDispatches: make([]*syncpoints.PublicDispatch, 0, len(dispatchableTransactions)), } - stateDistributions := make([]*components.StateDistribution, 0) - localStateDistributions := make([]*components.StateDistribution, 0) + stateDistributions := make([]*components.StateDistributionWithData, 0) + localStateDistributions := make([]*components.StateDistributionWithData, 0) preparedTxnDistributions := make([]*preparedtxdistribution.PreparedTxnDistribution, 0) for signingAddress, transactionFlows := range dispatchableTransactions { diff --git a/core/go/internal/privatetxnmgr/state_distribution_builder.go b/core/go/internal/privatetxnmgr/state_distribution_builder.go index 01b0856f4..f02e98e10 100644 --- a/core/go/internal/privatetxnmgr/state_distribution_builder.go +++ b/core/go/internal/privatetxnmgr/state_distribution_builder.go @@ -32,8 +32,8 @@ func newStateDistributionBuilder(c components.AllComponents, tx *components.Priv tx: tx, StateDistributionSet: components.StateDistributionSet{ LocalNode: c.TransportManager().LocalNodeName(), - Remote: []*components.StateDistribution{}, - Local: []*components.StateDistribution{}, + Remote: []*components.StateDistributionWithData{}, + Local: []*components.StateDistributionWithData{}, }, } } @@ -78,7 +78,7 @@ func (sd *stateDistributionBuilder) processStateForDistribution(ctx context.Cont } remainingNullifiers = newRemainingNullifiers - distribution := &components.StateDistribution{ + distribution := &components.StateDistributionWithData{ ID: uuid.New().String(), IdentityLocator: recipient, Domain: tx.Domain, diff --git a/core/go/internal/privatetxnmgr/state_distribution_builder_test.go b/core/go/internal/privatetxnmgr/state_distribution_builder_test.go index 0f3570875..8b5c6953f 100644 --- a/core/go/internal/privatetxnmgr/state_distribution_builder_test.go +++ b/core/go/internal/privatetxnmgr/state_distribution_builder_test.go @@ -157,7 +157,7 @@ func TestStateDistributionWithNullifiersAllRemote(t *testing.T) { // in this example the local coordinator node isn't involved require.Empty(t, sds.Local) - checkCommon := func(s *components.StateDistribution, withNullifier bool) { + checkCommon := func(s *components.StateDistributionWithData, withNullifier bool) { if withNullifier { require.Equal(t, "nullifier_algo", *s.NullifierAlgorithm) require.Equal(t, "nullifier_verifier_type", *s.NullifierVerifierType) diff --git a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go index a6e12e9ba..707fcac3c 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go +++ b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go @@ -60,7 +60,7 @@ type DispatchBatch struct { // PersistDispatches persists the dispatches to the database and coordinates with the public transaction manager // to submit public transactions. -func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistribution, preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistribution) error { +func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistribution) error { stateDistributionsPersisted := make([]*statedistribution.StateDistributionPersisted, 0, len(stateDistributions)) for _, stateDistribution := range stateDistributions { diff --git a/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go b/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go index 9d30004a1..fa508c8b0 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go +++ b/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go @@ -51,7 +51,7 @@ type SyncPoints interface { // to the PrivateTxnManager's persistence store in the same database transaction // Although the actual persistence is offloaded to the flushwriter, this method is synchronous and will block until the // dispatch sequence is written to the database - PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistribution, preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistribution) error + PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistribution) error // Deploy is a special case of dispatch batch, where there are no private states, so no domain context is required PersistDeployDispatchBatch(ctx context.Context, dispatchBatch *DispatchBatch) error diff --git a/core/go/internal/statedistribution/state_distributer.go b/core/go/internal/statedistribution/state_distributer.go index 3448dace6..e23f3ee5c 100644 --- a/core/go/internal/statedistribution/state_distributer.go +++ b/core/go/internal/statedistribution/state_distributer.go @@ -42,10 +42,10 @@ func NewStateDistributer( ) StateDistributer { sd := &stateDistributer{ persistence: persistence, - inputChan: make(chan *components.StateDistribution), + inputChan: make(chan *components.StateDistributionWithData), retryChan: make(chan string), acknowledgedChan: make(chan string), - pendingMap: make(map[string]*components.StateDistribution), + pendingMap: make(map[string]*components.StateDistributionWithData), stateManager: stateManager, keyManager: keyManager, transportManager: transportManager, @@ -81,8 +81,8 @@ StateDistributer is a component that is responsible for distributing state to re type StateDistributer interface { Start(ctx context.Context) error Stop(ctx context.Context) - BuildNullifiers(ctx context.Context, stateDistributions []*components.StateDistribution) ([]*components.NullifierUpsert, error) - DistributeStates(ctx context.Context, stateDistributions []*components.StateDistribution) + BuildNullifiers(ctx context.Context, stateDistributions []*components.StateDistributionWithData) ([]*components.NullifierUpsert, error) + DistributeStates(ctx context.Context, stateDistributions []*components.StateDistributionWithData) HandleStateProducedEvent(ctx context.Context, stateProducedEvent *pb.StateProducedEvent, distributingNode string) HandleStateAcknowledgedEvent(ctx context.Context, messagePayload []byte) } @@ -93,10 +93,10 @@ type stateDistributer struct { persistence persistence.Persistence stateManager components.StateManager keyManager components.KeyManager - inputChan chan *components.StateDistribution + inputChan chan *components.StateDistributionWithData retryChan chan string acknowledgedChan chan string - pendingMap map[string]*components.StateDistribution + pendingMap map[string]*components.StateDistributionWithData acknowledgementWriter *acknowledgementWriter receivedStateWriter *receivedStateWriter transportManager components.TransportManager @@ -151,7 +151,7 @@ func (sd *stateDistributer) Start(bgCtx context.Context) error { continue } - sd.inputChan <- &components.StateDistribution{ + sd.inputChan <- &components.StateDistributionWithData{ ID: stateDistribution.ID, StateID: stateDistribution.StateID.String(), IdentityLocator: stateDistribution.IdentityLocator, @@ -218,7 +218,7 @@ func (sd *stateDistributer) Start(bgCtx context.Context) error { return nil } -func (sd *stateDistributer) buildNullifier(ctx context.Context, krc components.KeyResolutionContextLazyDB, s *components.StateDistribution) (*components.NullifierUpsert, error) { +func (sd *stateDistributer) buildNullifier(ctx context.Context, krc components.KeyResolutionContextLazyDB, s *components.StateDistributionWithData) (*components.NullifierUpsert, error) { // We need to call the signing engine with the local identity to build the nullifier log.L(ctx).Infof("Generating nullifier for state %s on node %s (algorithm=%s,verifierType=%s,payloadType=%s)", s.StateID, sd.localNodeName, *s.NullifierAlgorithm, *s.NullifierVerifierType, *s.NullifierPayloadType) @@ -261,7 +261,7 @@ func (sd *stateDistributer) withKeyResolutionContext(ctx context.Context, fn fun return err // note we require err to be set before return } -func (sd *stateDistributer) BuildNullifiers(ctx context.Context, stateDistributions []*components.StateDistribution) (nullifiers []*components.NullifierUpsert, err error) { +func (sd *stateDistributer) BuildNullifiers(ctx context.Context, stateDistributions []*components.StateDistributionWithData) (nullifiers []*components.NullifierUpsert, err error) { nullifiers = []*components.NullifierUpsert{} err = sd.withKeyResolutionContext(ctx, func(krc components.KeyResolutionContextLazyDB) error { diff --git a/core/go/internal/statedistribution/state_distributer_test.go b/core/go/internal/statedistribution/state_distributer_test.go index c18de0ec5..95c9b9a4a 100644 --- a/core/go/internal/statedistribution/state_distributer_test.go +++ b/core/go/internal/statedistribution/state_distributer_test.go @@ -71,7 +71,7 @@ func TestBuildNullifiersNoOp(t *testing.T) { ctx, _, sd := newTestStateDistributor(t) - nullifiers, err := sd.BuildNullifiers(ctx, []*components.StateDistribution{ + nullifiers, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ { ID: uuid.New().String(), StateID: "id1", @@ -105,7 +105,7 @@ func TestBuildNullifiersOk(t *testing.T) { Return(nullifierBytes, nil) stateID := tktypes.HexBytes(tktypes.RandBytes(32)) - nullifiers, err := sd.BuildNullifiers(ctx, []*components.StateDistribution{ + nullifiers, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ { ID: uuid.New().String(), StateID: stateID.String(), @@ -139,7 +139,7 @@ func TestBuildNullifiersFail(t *testing.T) { Return(nil, fmt.Errorf("pop")) stateID := tktypes.HexBytes(tktypes.RandBytes(32)) - _, err := sd.BuildNullifiers(ctx, []*components.StateDistribution{ + _, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ { ID: uuid.New().String(), StateID: stateID.String(), @@ -159,7 +159,7 @@ func TestBuildNullifiersNotLocal(t *testing.T) { ctx, _, sd := newTestStateDistributor(t) stateID := tktypes.HexBytes(tktypes.RandHex(32)) - _, err := sd.BuildNullifiers(ctx, []*components.StateDistribution{ + _, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ { ID: uuid.New().String(), StateID: stateID.String(), diff --git a/core/go/internal/statedistribution/state_sender.go b/core/go/internal/statedistribution/state_sender.go index fd21598c9..a420d4ac9 100644 --- a/core/go/internal/statedistribution/state_sender.go +++ b/core/go/internal/statedistribution/state_sender.go @@ -26,14 +26,14 @@ import ( "google.golang.org/protobuf/proto" ) -func (sd *stateDistributer) DistributeStates(ctx context.Context, stateDistributions []*components.StateDistribution) { +func (sd *stateDistributer) DistributeStates(ctx context.Context, stateDistributions []*components.StateDistributionWithData) { log.L(ctx).Debugf("stateDistributer:DistributeStates %d state distributions", len(stateDistributions)) for _, stateDistribution := range stateDistributions { sd.inputChan <- stateDistribution } } -func (sd *stateDistributer) sendState(ctx context.Context, stateDistribution *components.StateDistribution) { +func (sd *stateDistributer) sendState(ctx context.Context, stateDistribution *components.StateDistributionWithData) { log.L(ctx).Debugf("stateDistributer:sendState id=%s,domain=%s contractAddress=%s schemaId=%s stateId=%s identity=%s, nullifierAlgorithm=%v nullifierVerifierType=%v nullifierPayloadType=%v]", stateDistribution.ID, stateDistribution.Domain, diff --git a/core/go/internal/statedistribution/transport_client.go b/core/go/internal/statedistribution/transport_client.go index d0221cc3a..41fbdd268 100644 --- a/core/go/internal/statedistribution/transport_client.go +++ b/core/go/internal/statedistribution/transport_client.go @@ -29,7 +29,7 @@ func (sd *stateDistributer) HandleStateProducedEvent(ctx context.Context, stateP log.L(ctx).Debugf("stateDistributer:handleStateProducedEvent") var err error - s := &components.StateDistribution{ + s := &components.StateDistributionWithData{ ID: stateProducedEvent.DistributionId, StateID: stateProducedEvent.StateId, IdentityLocator: stateProducedEvent.Party, diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index fe09a4c2e..b6ed42348 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -18,6 +18,7 @@ package transportmgr import ( "context" "sync" + "time" "github.com/google/uuid" "github.com/hyperledger/firefly-common/pkg/i18n" @@ -25,10 +26,12 @@ import ( "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/msgs" + "github.com/kaleido-io/paladin/core/pkg/persistence" "gorm.io/gorm" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" + "github.com/kaleido-io/paladin/toolkit/pkg/retry" "github.com/kaleido-io/paladin/toolkit/pkg/rpcserver" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" ) @@ -41,6 +44,8 @@ type transportManager struct { conf *pldconf.TransportManagerConfig localNodeName string registryManager components.RegistryManager + stateManager components.StateManager + persistence persistence.Persistence transportsByID map[uuid.UUID]*transport transportsByName map[string]*transport @@ -52,18 +57,25 @@ type transportManager struct { peersLock sync.RWMutex peers map[string]*peer - senderBufferLen int + sendShortRetry *retry.Retry + reliableScanRetry *retry.Retry + + senderBufferLen int + reliableMessageResend time.Duration } func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerConfig) components.TransportManager { return &transportManager{ - bgCtx: bgCtx, - conf: conf, - localNodeName: conf.NodeName, - transportsByID: make(map[uuid.UUID]*transport), - transportsByName: make(map[string]*transport), - destinations: make(map[string]components.TransportClient), - senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), + bgCtx: bgCtx, + conf: conf, + localNodeName: conf.NodeName, + transportsByID: make(map[uuid.UUID]*transport), + transportsByName: make(map[string]*transport), + destinations: make(map[string]components.TransportClient), + senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), + reliableMessageResend: confutil.DurationMin(conf.ReliableMessageResend, 100*time.Millisecond, *pldconf.TransportManagerDefaults.ReliableMessageResend), + sendShortRetry: retry.NewRetryLimited(&conf.SendRetry, &pldconf.TransportManagerDefaults.SendRetry), + reliableScanRetry: retry.NewRetryIndefinite(&conf.ReliableScanRetry, &pldconf.TransportManagerDefaults.ReliableScanRetry), } } @@ -82,6 +94,8 @@ func (tm *transportManager) PostInit(c components.AllComponents) error { // plugin manager starts, and thus before any domain would have started any go-routine // that could have cached a nil value in memory. tm.registryManager = c.RegistryManager() + tm.stateManager = c.StateManager() + tm.persistence = c.Persistence() return nil } @@ -199,15 +213,16 @@ func (tm *transportManager) LocalNodeName() string { return tm.localNodeName } -func (tm *transportManager) prepareNewMessage(ctx context.Context, msg *components.TransportMessage) (*peer, error) { - msg.Created = tktypes.TimestampNow() +// See docs in components package +func (tm *transportManager) Send(ctx context.Context, msg *components.TransportMessage) error { + msg.MessageID = uuid.New() // Check the message is valid if len(msg.MessageType) == 0 || len(msg.Payload) == 0 { log.L(ctx).Errorf("Invalid message send request %+v", msg) - return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) + return i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } if msg.ReplyTo == "" { @@ -216,17 +231,6 @@ func (tm *transportManager) prepareNewMessage(ctx context.Context, msg *componen // Use or establish a peer connection for the send peer, err := tm.getPeer(ctx, msg.Node) - if err != nil { - return nil, err - } - - return peer, nil -} - -// See docs in components package -func (tm *transportManager) Send(ctx context.Context, msg *components.TransportMessage) error { - - peer, err := tm.prepareNewMessage(ctx, msg) if err != nil { return err } @@ -246,21 +250,29 @@ func (tm *transportManager) Send(ctx context.Context, msg *components.TransportM } // See docs in components package -func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg *components.TransportMessage) (preCommit func(), err error) { +func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg *components.ReliableMessage) (preCommit func(), err error) { - peer, err := tm.prepareNewMessage(ctx, msg) - if err != nil { - return nil, err + var p *peer + + msg.ID = uuid.New() + msg.Created = tktypes.TimestampNow() + _, err = msg.MessageType.Validate() + + if err == nil { + p, err = tm.getPeer(ctx, msg.Node) + } + + if err == nil { + err = dbTX. + WithContext(ctx). + Create(msg). + Error } - err = dbTX. - WithContext(ctx). - Create(msg). - Error if err != nil { return nil, err } - return peer.notifyPersistedMsgAvailable, nil + return p.notifyPersistedMsgAvailable, nil } diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index da98b513a..5009feb12 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -18,7 +18,9 @@ package transportmgr import ( "cmp" "context" + "encoding/json" "sort" + "time" "github.com/hyperledger/firefly-common/pkg/i18n" "github.com/kaleido-io/paladin/config/pkg/confutil" @@ -26,6 +28,8 @@ import ( "github.com/kaleido-io/paladin/core/internal/msgs" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" + "gorm.io/gorm/clause" ) type peer struct { @@ -34,11 +38,16 @@ type peer struct { name string tm *transportManager - transport *transport + transport *transport // the transport mutually supported by us and the remote node + peerInfo map[string]any // opaque JSON object from the transport persistedMsgsAvailable chan struct{} sendQueue chan *components.TransportMessage + // Send loop state (no lock as only used on the loop) + lastFullScan time.Time + lastDrainHWM *tktypes.Timestamp + done chan struct{} } @@ -113,8 +122,10 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer // See if any of the transports registered by the node, are configured on this local node // Note: We just pick the first one if multiple are available, and there is no retry to // fallback to a secondary one currently. + var remoteTransportDetails string for _, rtd := range registeredTransportDetails { p.transport = tm.transportsByName[rtd.Transport] + remoteTransportDetails = rtd.Details } if p.transport == nil { // If we didn't find one, then feedback to the caller which transports were registered @@ -125,6 +136,18 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer return nil, i18n.NewError(p.ctx, msgs.MsgTransportNoTransportsConfiguredForNode, nodeName, registeredTransportNames) } + // Activate the connection (the deactivate is deferred to the send loop) + res, err := p.transport.api.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + NodeName: nodeName, + TransportDetails: remoteTransportDetails, + }) + if err == nil { + err = json.Unmarshal([]byte(res.PeerInfoJson), &p.peerInfo) + } + if err != nil { + return nil, err + } + log.L(ctx).Debugf("connected to peer '%s'", nodeName) tm.peers[nodeName] = p return p, nil @@ -137,14 +160,13 @@ func (p *peer) notifyPersistedMsgAvailable() { } } -func (p *peer) send(ctx context.Context, msg *components.TransportMessage) error { - +func (p *peer) mapMsg(msg *components.TransportMessage) *prototk.Message { // Convert the message to the protobuf transport payload var correlID *string if msg.CorrelationID != nil { correlID = confutil.P(msg.CorrelationID.String()) } - pMsg := &prototk.Message{ + return &prototk.Message{ MessageType: msg.MessageType, MessageId: msg.MessageID.String(), CorrelationId: correlID, @@ -153,32 +175,208 @@ func (p *peer) send(ctx context.Context, msg *components.TransportMessage) error ReplyTo: msg.ReplyTo, Payload: msg.Payload, } - return p.transport.send(ctx, pMsg) +} + +func (p *peer) stateDistributionMsg(rm *components.ReliableMessage, targetNode string, sd *components.StateDistributionWithData) *prototk.Message { + payload, _ := json.Marshal(sd) + return &prototk.Message{ + MessageType: "StateProducedEvent", + Payload: payload, + Node: targetNode, + Component: components.PRIVATE_TX_MANAGER_DESTINATION, + ReplyTo: p.tm.localNodeName, + } } -func (p *peer) sender() { - defer close(p.done) +func (p *peer) send(msg *prototk.Message) error { + return p.tm.sendShortRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { + return true, p.transport.send(p.ctx, msg) + }) +} - log.L(p.ctx).Infof("peer %s active", p.name) +func (p *peer) senderDone() { + log.L(p.ctx).Infof("peer %s deactivating", p.name) + if _, err := p.transport.api.DeactivateNode(p.ctx, &prototk.DeactivateNodeRequest{ + NodeName: p.name, + }); err != nil { + log.L(p.ctx).Warnf("peer %s returned deactivation error: %s", p.name, err) + } + close(p.done) +} + +func (p *peer) reliableMessageScan() error { + + checkNew := true + fullScan := p.lastDrainHWM == nil || time.Since(p.lastFullScan) >= p.tm.reliableMessageResend + select { + case <-p.persistedMsgsAvailable: + checkNew = true + default: + } + + if !fullScan && !checkNew { + return nil // Nothing to do + } - var persistedStale bool - var persistedPage []*components.TransportMessage + const pageSize = 100 + + var total = 0 + var lastPageEnd *tktypes.Timestamp for { + query := p.tm.persistence.DB(). + WithContext(p.ctx). + Order("created ASC"). + Joins("Ack"). + Where(`"Ack"."time" IS NULL`). + Limit(pageSize) + if lastPageEnd != nil { + query = query.Where("created > ?", *lastPageEnd) + } else if !fullScan { + query = query.Where("created > ?", *p.lastDrainHWM) + } + + var page []*components.ReliableMessage + err := query.Find(&page).Error + if err != nil { + return err + } + + // Process the page - building and sending the proto messages + if err = p.processReliableMsgPage(page); err != nil { + // Errors returned are retryable - for data errors the function + // must record those as acks with an error. + return err + } + + if len(page) > 0 { + total += len(page) + lastPageEnd = &page[len(page)-1].Created + } + + // If we didn't have a full page, then we're done + if len(page) < pageSize { + break + } + + } - var nextMessage *components.TransportMessage + log.L(p.ctx).Debugf("reliableMessageScan fullScan=%t total=%d lastPageEnd=%v", fullScan, total, lastPageEnd) - if len(persistedPage) > 0 { + // If we found anything, then mark that as our high water mark for + // future scans. If an empty full scan - then we store nil + if lastPageEnd != nil || fullScan { + p.lastDrainHWM = lastPageEnd + } + + // Record the last full scan + if fullScan { + p.lastFullScan = time.Now() + } + + return nil +} + +func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*prototk.Message, error, error) { + + // Validate the message first (not retryable) + var sd components.StateDistributionWithData + var stateID tktypes.HexBytes + var contractAddr *tktypes.EthAddress + parseErr := json.Unmarshal(rm.Metadata, &sd) + if parseErr == nil { + stateID, parseErr = tktypes.ParseHexBytes(p.ctx, sd.StateID) + } + if parseErr == nil { + contractAddr, parseErr = tktypes.ParseEthAddress(sd.ContractAddress) + } + if parseErr != nil { + return nil, parseErr, nil + } + + // Get the state - distinguishing between not found, vs. a retryable error + state, err := p.tm.stateManager.GetState(p.ctx, p.tm.persistence.DB(), sd.Domain, *contractAddr, stateID, false, false) + if err != nil { + return nil, nil, err + } + if state == nil { + return nil, + i18n.NewError(p.ctx, msgs.MsgTransportStateNotAvailableLocally, sd.Domain, *contractAddr, stateID), + nil + } + + return nil, nil, nil +} + +func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err error) { + // Build the messages + msgsToSend := make([]*prototk.Message, 0, len(page)) + var errorAcks []*components.ReliableMessageAck + for _, rm := range page { + var msg *prototk.Message + var errorAck error + switch rm.MessageType.V() { + case components.RMTState: + msg, errorAck, err = p.buildStateDistributionMsg(rm) + case components.RMTReceipt: + // TODO: Implement for receipt distribution + fallthrough + default: + errorAck = i18n.NewError(p.ctx, msgs.MsgTransportUnsupportedReliableMsg, rm.MessageType) } + switch { + case err != nil: + return err + case errorAck != nil: + errorAcks = append(errorAcks, &components.ReliableMessageAck{ + MessageID: rm.ID, + Time: tktypes.TimestampNow(), + Error: errorAck.Error(), + }) + case msg != nil: + msgsToSend = append(msgsToSend, msg) + } + } + + // Persist any bad message failures + if len(errorAcks) > 0 { + err := p.tm.persistence.DB(). + WithContext(p.ctx). + Clauses(clause.OnConflict{DoNothing: true}). + Create(errorAcks). + Error + if err != nil { + return err + } + } + + // Send the messages, with short retry. + // We fail the whole page on error, so we don't thrash (the outer infinite retry + // gives a much longer maximum back-off). + for _, msg := range msgsToSend { + if err := p.send(msg); err != nil { + return err + } + } + + return nil + +} + +func (p *peer) sender() { + defer p.senderDone() + + log.L(p.ctx).Infof("peer %s active", p.name) + + for { - select { - case <-p.ctx.Done(): - log.L(p.ctx).Infof("peer %s inactive", p.name) - return - case <-p.persistedMsgsAvailable: - persistedStale = true - case nextMessage = <-p.sendQueue: + // We send/resend any reliable messages queued up first + err := p.tm.reliableScanRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { + return true, p.reliableMessageScan() + }) + if err != nil { + return // context closed } } } diff --git a/toolkit/proto/protos/from_transport.proto b/toolkit/proto/protos/from_transport.proto index 3e3cf0116..aaf799cd3 100644 --- a/toolkit/proto/protos/from_transport.proto +++ b/toolkit/proto/protos/from_transport.proto @@ -20,7 +20,8 @@ package io.kaleido.paladin.toolkit; import "to_transport.proto"; message ReceiveMessageRequest { - Message message = 1; + string node = 1; + PaladinMsg message = 2; } message ReceiveMessageResponse { diff --git a/toolkit/proto/protos/to_transport.proto b/toolkit/proto/protos/to_transport.proto index 0aa00d3ab..116a7ee32 100644 --- a/toolkit/proto/protos/to_transport.proto +++ b/toolkit/proto/protos/to_transport.proto @@ -26,7 +26,8 @@ message ConfigureTransportResponse { } message SendMessageRequest { - Message message = 1; + string node = 1; + PaladinMsg message = 2; } message SendMessageResponse { @@ -55,13 +56,14 @@ message GetLocalDetailsResponse { string transport_details = 1; // local transport details that can be shared via registry with other parties } -message Message { - string message_id = 1; - optional string correlation_id = 2; - string component = 3; // component name to route message to once it arrives at the destination node - string node = 4; // id of the node to send to - string reply_to = 5; // id of the node to reply to - string message_type =65; - bytes payload = 7; - +message PaladinMsg { + enum Component { + TRANSACTION_ENGINE = 0; + RELIABLE_MESSAGE_HANDLER = 1; + } + string message_id = 1; // UUID individually allocated to each message + optional string correlation_id = 2; // optional correlation ID to relate "replies" back to original message IDs + Component component = 3; // components are allocated here + string message_type = 4; // message types are managed within each component + bytes payload = 5; // arbitrary payload } \ No newline at end of file diff --git a/transports/grpc/internal/grpctransport/grpc_transport.go b/transports/grpc/internal/grpctransport/grpc_transport.go index c345adf8a..368f2b823 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport.go +++ b/transports/grpc/internal/grpctransport/grpc_transport.go @@ -185,24 +185,16 @@ func (t *grpcTransport) ConnectSendStream(stream grpc.ClientStreamingServer[prot return err } - log.L(ctx).Infof("GRPC received message id=%s cid=%v component=%s messageType=%s replyTo=%s from peer %s", - msg.MessageId, msg.CorrelationId, msg.Component, msg.MessageType, msg.ReplyTo, ai.verifiedNodeName) - - // Check the message is from the node we expect. - // Note the destination node is checked by Paladin - just just have to verify the sender. - if msg.ReplyTo != ai.verifiedNodeName { - log.L(ctx).Errorf("Invalid replyTo: %s", msg.ReplyTo) - return i18n.NewError(ctx, msgs.MsgInvalidReplyToNode) - } + log.L(ctx).Infof("GRPC received message id=%s cid=%v component=%d messageType=%s from peer %s", + msg.MessageId, msg.CorrelationId, msg.Component, msg.MessageType, ai.verifiedNodeName) // Deliver it to Paladin _, err = t.callbacks.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Message: &prototk.Message{ + Node: ai.verifiedNodeName, + Message: &prototk.PaladinMsg{ MessageId: msg.MessageId, CorrelationId: msg.CorrelationId, - Component: msg.Component, - Node: msg.Node, - ReplyTo: msg.ReplyTo, + Component: prototk.PaladinMsg_Component(msg.Component), MessageType: msg.MessageType, Payload: msg.Payload, }, @@ -277,22 +269,17 @@ func (t *grpcTransport) getConnection(nodeName string) *outboundConn { func (t *grpcTransport) SendMessage(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { msg := req.Message - if req.Message.Node == "" { - return nil, i18n.NewError(ctx, msgs.MsgErrorNoTargetNode) - } - oc := t.getConnection(msg.Node) + oc := t.getConnection(req.Node) if oc == nil { // This is an error in the Paladin layer - return nil, i18n.NewError(ctx, msgs.MsgNodeNotActive, msg.Node) + return nil, i18n.NewError(ctx, msgs.MsgNodeNotActive, req.Node) } - log.L(ctx).Infof("GRPC sending message id=%s cid=%v component=%s messageType=%s replyTo=%s to peer %s", - msg.MessageId, msg.CorrelationId, msg.Component, msg.MessageType, msg.ReplyTo, msg.Node) + log.L(ctx).Infof("GRPC sending message id=%s cid=%v component=%s messageType=%s to peer %s", + msg.MessageId, msg.CorrelationId, msg.Component, msg.MessageType, req.Node) err := oc.send(&proto.Message{ MessageId: msg.MessageId, CorrelationId: msg.CorrelationId, - Component: msg.Component, - Node: msg.Node, - ReplyTo: msg.ReplyTo, + Component: int32(msg.Component), MessageType: msg.MessageType, Payload: msg.Payload, }) diff --git a/transports/grpc/internal/grpctransport/grpc_transport_test.go b/transports/grpc/internal/grpctransport/grpc_transport_test.go index 5c2af5fce..3fed123ba 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport_test.go +++ b/transports/grpc/internal/grpctransport/grpc_transport_test.go @@ -134,32 +134,9 @@ func TestReceiveFail(t *testing.T) { var err error for err == nil { _, err = plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", - }, - }) - } - assert.Error(t, err) - -} - -func TestBadReplyTo(t *testing.T) { - - ctx := context.Background() - - plugin1, _, done := newSuccessfulVerifiedConnection(t) - defer done() - - // Send and we should get an error as the server fails - var err error - for err == nil { - _, err = plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "not.mine", - Component: "to.you", - Node: "node2", + Node: "node2", + Message: &prototk.PaladinMsg{ + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }, }) } @@ -184,10 +161,9 @@ func TestConnectFail(t *testing.T) { var err error for err == nil { _, err = plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", + Node: "node2", + Message: &prototk.PaladinMsg{ + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }, }) } @@ -207,10 +183,9 @@ func TestSendNotActivated(t *testing.T) { defer done() _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node3", + Node: "node3", + Message: &prototk.PaladinMsg{ + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }, }) assert.Regexp(t, "PD030016", err) @@ -251,27 +226,6 @@ func TestConnectBadTransport(t *testing.T) { } -func TestSendNoNode(t *testing.T) { - - ctx := context.Background() - - plugin1, _, done := newSuccessfulVerifiedConnection(t, func(_, callbacks2 *testCallbacks) { - callbacks2.receiveMessage = func(ctx context.Context, rmr *prototk.ReceiveMessageRequest) (*prototk.ReceiveMessageResponse, error) { - return &prototk.ReceiveMessageResponse{}, nil - } - }) - defer done() - - _, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "someComponent", - }, - }) - assert.Regexp(t, "PD030013", err) - -} - func TestConnectSendStreamBadSecurityCtx(t *testing.T) { plugin, _, _, done := newTestGRPCTransport(t, "", "", &Config{}) @@ -298,9 +252,7 @@ func TestConnectSendStreamBadSecurityCtx(t *testing.T) { for err == nil { err = s.Send(&proto.Message{ - ReplyTo: "not.mine", - Component: "to.you", - Node: "node2", + Component: int32(prototk.PaladinMsg_TRANSACTION_ENGINE), }) } assert.Error(t, err) diff --git a/transports/grpc/internal/grpctransport/tls_verifier_test.go b/transports/grpc/internal/grpctransport/tls_verifier_test.go index 6d7a9a8cc..3bf4fca1d 100644 --- a/transports/grpc/internal/grpctransport/tls_verifier_test.go +++ b/transports/grpc/internal/grpctransport/tls_verifier_test.go @@ -208,7 +208,7 @@ func testActivateNode(t *testing.T, sender *grpcTransport, remoteNodeName string func TestGRPCTransport_DirectCertVerification_OK(t *testing.T) { ctx := context.Background() - received := make(chan *prototk.Message) + received := make(chan *prototk.PaladinMsg) plugin1, _, done := newSuccessfulVerifiedConnection(t, func(_, callbacks2 *testCallbacks) { callbacks2.receiveMessage = func(ctx context.Context, rmr *prototk.ReceiveMessageRequest) (*prototk.ReceiveMessageResponse, error) { received <- rmr.Message @@ -219,10 +219,9 @@ func TestGRPCTransport_DirectCertVerification_OK(t *testing.T) { // Connect and send from plugin1 to plugin2 sendRes, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", + Node: "node2", + Message: &prototk.PaladinMsg{ + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }, }) assert.NoError(t, err) @@ -244,7 +243,7 @@ func TestGRPCTransport_DirectCertVerification_OK(t *testing.T) { func TestGRPCTransport_DirectCertVerificationWithKeyRotation_OK(t *testing.T) { ctx := context.Background() - received := make(chan *prototk.Message) + received := make(chan *prototk.PaladinMsg) // the default config is direct cert verification node1Cert, node1Key := buildTestCertificate(t, pkix.Name{CommonName: "node1"}, nil, nil) @@ -273,10 +272,9 @@ func TestGRPCTransport_DirectCertVerificationWithKeyRotation_OK(t *testing.T) { deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) defer deactivate() sendRes, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", + Node: "node2", + Message: &prototk.PaladinMsg{ + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }, }) assert.NoError(t, err) @@ -315,7 +313,7 @@ func TestGRPCTransport_CACertVerificationWithSubjectRegex_OK(t *testing.T) { defer done2() transportDetails1.Issuers = "" // to ensure we're not falling back to cert verification - received := make(chan *prototk.Message) + received := make(chan *prototk.PaladinMsg) callbacks2.receiveMessage = func(ctx context.Context, rmr *prototk.ReceiveMessageRequest) (*prototk.ReceiveMessageResponse, error) { received <- rmr.Message return &prototk.ReceiveMessageResponse{}, nil @@ -330,10 +328,9 @@ func TestGRPCTransport_CACertVerificationWithSubjectRegex_OK(t *testing.T) { deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) defer deactivate() sendRes, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{ - ReplyTo: "node1", - Component: "to.you", - Node: "node2", + Node: "node2", + Message: &prototk.PaladinMsg{ + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }, }) assert.NoError(t, err) diff --git a/transports/grpc/internal/msgs/en_errors.go b/transports/grpc/internal/msgs/en_errors.go index 1230802f7..805110a1e 100644 --- a/transports/grpc/internal/msgs/en_errors.go +++ b/transports/grpc/internal/msgs/en_errors.go @@ -43,10 +43,8 @@ var ( MsgPeerCertificateIssuerInvalid = ffe("PD030007", "peer '%s' did not provide a certificate signed an expected issuer received=%s issuers=%v") MsgTLSNegotiationFailed = ffe("PD030008", "TLS negotiation did not result in a verified peer node name") MsgAuthContextNotAvailable = ffe("PD030009", "server failed to retrieve the auth context") - MsgInvalidReplyToNode = ffe("PD030010", "replyTo node does not match sending node") MsgConnectionToWrongNode = ffe("PD030011", "the TLS identity of the node '%s' does not match the expected node '%s'") MsgPEMCertificateInvalid = ffe("PD030012", "invalid PEM encoded x509 certificate") - MsgErrorNoTargetNode = ffe("PD030013", "request to send message but no target node specified") MsgInvalidTransportDetails = ffe("PD030014", "Invalid transport details for node '%s'") MsgConnectionFailed = ffe("PD030015", "GRPC connection failed for endpoint '%s'") MsgNodeNotActive = ffe("PD030016", "Send for node that is not active '%s'") diff --git a/transports/grpc/pkg/proto/paladin.proto b/transports/grpc/pkg/proto/paladin.proto index f30c031d2..aa3f0f549 100644 --- a/transports/grpc/pkg/proto/paladin.proto +++ b/transports/grpc/pkg/proto/paladin.proto @@ -27,9 +27,7 @@ message Empty {} message Message { string message_id = 1; optional string correlation_id = 2; - string node = 3; - string component = 4; - string reply_to = 5; + int32 component = 4; string message_type = 6; bytes payload = 7; } \ No newline at end of file From 1725f86c4357a777d95e487befe19434181baa09 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 29 Dec 2024 14:56:18 -0500 Subject: [PATCH 06/41] Working through refactor of transport interface Signed-off-by: Peter Broadhurst --- core/go/build.gradle | 6 - core/go/internal/components/privatetxmgr.go | 1 + core/go/internal/components/transportmgr.go | 22 +- .../identityresolver/transport_client.go | 2 +- core/go/internal/msgs/en_errors.go | 1 - .../transport_client.go | 4 +- .../privatetxnmgr/assemble_coordinator.go | 5 +- .../privatetxnmgr/nullifier_distribution.go | 91 ++++++ .../internal/privatetxnmgr/private_txn_mgr.go | 15 +- .../privatetxnmgr/private_txn_mgr_test.go | 14 +- core/go/internal/privatetxnmgr/sequencer.go | 5 - .../internal/privatetxnmgr/sequencer_test.go | 3 - .../privatetxnmgr/syncpoints/dispatch.go | 14 - .../privatetxnmgr/transaction_flow_test.go | 3 - .../privatetxnmgr/transport_receiver.go | 2 +- .../acknowledgment_writer.go | 94 ------ .../received_state_writer.go | 129 -------- .../statedistribution/state_distributer.go | 290 ------------------ .../state_distributer_test.go | 175 ----------- .../statedistribution/state_receiver.go | 55 ---- .../statedistribution/state_sender.go | 90 ------ .../statedistribution/transport_client.go | 100 ------ core/go/internal/transportmgr/manager.go | 38 ++- core/go/internal/transportmgr/peer.go | 42 +-- core/go/internal/transportmgr/transport.go | 47 +-- .../internal/transportmgr/transport_test.go | 74 ++--- core/go/pkg/testbed/testbed.go | 2 +- core/go/pkg/testbed/testbed_engine_stubs.go | 13 +- toolkit/go/pkg/tktypes/rawjson_test.go | 2 +- toolkit/go/pkg/tktypes/string_validation.go | 7 + .../go/pkg/tktypes/string_validation_test.go | 8 + toolkit/proto/protos/from_transport.proto | 3 +- .../internal/grpctransport/grpc_transport.go | 1 - 33 files changed, 201 insertions(+), 1157 deletions(-) create mode 100644 core/go/internal/privatetxnmgr/nullifier_distribution.go delete mode 100644 core/go/internal/statedistribution/acknowledgment_writer.go delete mode 100644 core/go/internal/statedistribution/received_state_writer.go delete mode 100644 core/go/internal/statedistribution/state_distributer.go delete mode 100644 core/go/internal/statedistribution/state_distributer_test.go delete mode 100644 core/go/internal/statedistribution/state_receiver.go delete mode 100644 core/go/internal/statedistribution/state_sender.go delete mode 100644 core/go/internal/statedistribution/transport_client.go diff --git a/core/go/build.gradle b/core/go/build.gradle index b6c21292f..545e31283 100644 --- a/core/go/build.gradle +++ b/core/go/build.gradle @@ -230,12 +230,6 @@ task makeMocks(type: Mockery, dependsOn: [":installMockery", protoc, goGet]) { outputPackage 'ethclientmocks' outputDir 'mocks/ethclientmocks' } - mock { - inputDir 'internal/statedistribution' - includeAll true - outputPackage 'statedistributionmocks' - outputDir 'mocks/statedistributionmocks' - } mock { inputDir 'internal/preparedtxdistribution' includeAll true diff --git a/core/go/internal/components/privatetxmgr.go b/core/go/internal/components/privatetxmgr.go index c0c12ab5a..eab0d744f 100644 --- a/core/go/internal/components/privatetxmgr.go +++ b/core/go/internal/components/privatetxmgr.go @@ -96,4 +96,5 @@ type PrivateTxManager interface { PrivateTransactionConfirmed(ctx context.Context, receipt *TxCompletion) BuildStateDistributions(ctx context.Context, tx *PrivateTransaction) (*StateDistributionSet, error) + BuildNullifiers(ctx context.Context, distributions []*StateDistributionWithData) (nullifiers []*NullifierUpsert, err error) } diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index ddba145c9..f0485c2f9 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -23,17 +23,16 @@ import ( "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" + "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" ) -type TransportMessage struct { - MessageID uuid.UUID `json:"id"` - CorrelationID *uuid.UUID `json:"correlationId"` - Component string `json:"component"` - Node string `json:"node"` // The node id to send the message to - ReplyTo string `json:"replyTo"` // The identity to respond to on the sending node - MessageType string `json:"messageType"` - Payload []byte `json:"payload"` +type FireAndForgetMessageSend struct { + Node string + Component prototk.PaladinMsg_Component + CorrelationID *uuid.UUID + MessageType string + Payload []byte } type ReliableMessageType string @@ -79,7 +78,8 @@ type TransportManagerToTransport interface { // TransportClient is the interface for a component that can receive messages from the transport manager type TransportClient interface { // Destination returns a string that should be matched with the Destination field of incomming messages to be routed to this client - Destination() string + Destination() prototk.PaladinMsg_Component + // This function is used by the transport manager to deliver messages to the engine. // // The implementation of this function: @@ -107,7 +107,7 @@ type TransportClient interface { // It delivers messages to this function: // - in whatever order they are received from the transport plugin(s), which is dependent on the _sender_ usually // - with whatever concurrency is performed by the transport plugin(s), which is commonly one per remote node, but that's not assured - ReceiveTransportMessage(context.Context, *TransportMessage) + HandlePaladinMsg(context.Context, *prototk.PaladinMsg) } type TransportManager interface { @@ -126,7 +126,7 @@ type TransportManager interface { // situation to recover from (although not critical path). // // at-most-once delivery semantics - Send(ctx context.Context, message *TransportMessage) error + Send(ctx context.Context, send *FireAndForgetMessageSend) error // Sends a message with at-least-once delivery semantics // diff --git a/core/go/internal/identityresolver/transport_client.go b/core/go/internal/identityresolver/transport_client.go index 3d6398315..ba5e246ea 100644 --- a/core/go/internal/identityresolver/transport_client.go +++ b/core/go/internal/identityresolver/transport_client.go @@ -31,7 +31,7 @@ func (p *identityResolver) Destination() string { return IDENTITY_RESOLVER_DESTINATION } -func (ir *identityResolver) ReceiveTransportMessage(ctx context.Context, message *components.TransportMessage) { +func (ir *identityResolver) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { //TODO this need to become an ultra low latency, non blocking, handover to the event loop thread. // need some thought on how to handle errors, retries, buffering, swapping idle sequencers in and out of memory etc... diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index 8e6a74397..843fe24eb 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -359,7 +359,6 @@ var ( MsgTransportNodeNameNotConfigured = ffe("PD012002", "nodeName must be configured to set the identity of the local node") MsgTransportNoTransportsConfiguredForNode = ffe("PD012003", "None of the transports registered by node '%s' are configured locally on this node: %v") MsgTransportDetailsNotAvailable = ffe("PD012004", "Transport '%s' not available for node '%s'") - MsgTransportInvalidNodeReceived = ffe("PD012005", "Message received on node '%s' but is addressed for node '%s'") MsgTransportInvalidReplyToReceived = ffe("PD012006", "Message received with invalid replyTo destination: '%s'") MsgTransportInvalidDestinationSend = ffe("PD012007", "Message has invalid destination for sending from local node '%s': '%s'") MsgTransportInvalidReplyToSend = ffe("PD012008", "Message has invalid replyTo destination: '%s'") diff --git a/core/go/internal/preparedtxdistribution/transport_client.go b/core/go/internal/preparedtxdistribution/transport_client.go index 0613811be..5354199d0 100644 --- a/core/go/internal/preparedtxdistribution/transport_client.go +++ b/core/go/internal/preparedtxdistribution/transport_client.go @@ -32,8 +32,8 @@ func (sd *preparedTransactionDistributer) Destination() string { return PREPARED_TRANSACTION_DISTRIBUTER_DESTINATION } -func (sd *preparedTransactionDistributer) ReceiveTransportMessage(ctx context.Context, message *components.TransportMessage) { - log.L(ctx).Debugf("preparedTransactionDistributer:ReceiveTransportMessage") +func (sd *preparedTransactionDistributer) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { + log.L(ctx).Debugf("preparedTransactionDistributer:HandlePaladinMsg") messagePayload := message.Payload switch message.MessageType { diff --git a/core/go/internal/privatetxnmgr/assemble_coordinator.go b/core/go/internal/privatetxnmgr/assemble_coordinator.go index 726690794..4ec411e7a 100644 --- a/core/go/internal/privatetxnmgr/assemble_coordinator.go +++ b/core/go/internal/privatetxnmgr/assemble_coordinator.go @@ -22,7 +22,6 @@ import ( "github.com/google/uuid" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes" - "github.com/kaleido-io/paladin/core/internal/statedistribution" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" ) @@ -40,7 +39,6 @@ type assembleCoordinator struct { contractAddress tktypes.EthAddress sequencerEnvironment ptmgrtypes.SequencerEnvironment requestTimeout time.Duration - stateDistributer statedistribution.StateDistributer localAssembler ptmgrtypes.LocalAssembler } @@ -51,7 +49,7 @@ type assembleRequest struct { transactionPreassembly *components.TransactionPreAssembly } -func NewAssembleCoordinator(ctx context.Context, nodeName string, maxPendingRequests int, components components.AllComponents, domainAPI components.DomainSmartContract, domainContext components.DomainContext, transportWriter ptmgrtypes.TransportWriter, contractAddress tktypes.EthAddress, sequencerEnvironment ptmgrtypes.SequencerEnvironment, requestTimeout time.Duration, stateDistributer statedistribution.StateDistributer, localAssembler ptmgrtypes.LocalAssembler) ptmgrtypes.AssembleCoordinator { +func NewAssembleCoordinator(ctx context.Context, nodeName string, maxPendingRequests int, components components.AllComponents, domainAPI components.DomainSmartContract, domainContext components.DomainContext, transportWriter ptmgrtypes.TransportWriter, contractAddress tktypes.EthAddress, sequencerEnvironment ptmgrtypes.SequencerEnvironment, requestTimeout time.Duration, localAssembler ptmgrtypes.LocalAssembler) ptmgrtypes.AssembleCoordinator { return &assembleCoordinator{ ctx: ctx, nodeName: nodeName, @@ -65,7 +63,6 @@ func NewAssembleCoordinator(ctx context.Context, nodeName string, maxPendingRequ contractAddress: contractAddress, sequencerEnvironment: sequencerEnvironment, requestTimeout: requestTimeout, - stateDistributer: stateDistributer, localAssembler: localAssembler, } } diff --git a/core/go/internal/privatetxnmgr/nullifier_distribution.go b/core/go/internal/privatetxnmgr/nullifier_distribution.go new file mode 100644 index 000000000..54997c578 --- /dev/null +++ b/core/go/internal/privatetxnmgr/nullifier_distribution.go @@ -0,0 +1,91 @@ +/* + * Copyright © 2024 Kaleido, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package privatetxnmgr + +import ( + "context" + + "github.com/hyperledger/firefly-common/pkg/i18n" + "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/core/internal/msgs" + "github.com/kaleido-io/paladin/toolkit/pkg/log" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" +) + +func (p *privateTxManager) buildNullifier(ctx context.Context, krc components.KeyResolutionContextLazyDB, s *components.StateDistributionWithData) (*components.NullifierUpsert, error) { + // We need to call the signing engine with the local identity to build the nullifier + log.L(ctx).Infof("Generating nullifier for state %s on node %s (algorithm=%s,verifierType=%s,payloadType=%s)", + s.StateID, p.nodeName, *s.NullifierAlgorithm, *s.NullifierVerifierType, *s.NullifierPayloadType) + + // We require a fully qualified identifier for the local node in this function + identifier, node, err := tktypes.PrivateIdentityLocator(s.IdentityLocator).Validate(ctx, "", false) + if err != nil || node != p.nodeName { + return nil, i18n.WrapError(ctx, err, msgs.MsgStateDistributorNullifierNotLocal) + } + + // Call the signing engine to build the nullifier + var nulliferBytes []byte + mapping, err := krc.KeyResolverLazyDB().ResolveKey(identifier, *s.NullifierAlgorithm, *s.NullifierVerifierType) + if err == nil { + nulliferBytes, err = p.components.KeyManager().Sign(ctx, mapping, *s.NullifierPayloadType, s.StateData.Bytes()) + } + if err != nil || len(nulliferBytes) == 0 { + return nil, i18n.WrapError(ctx, err, msgs.MsgStateDistributorNullifierFail, s.StateID) + } + return &components.NullifierUpsert{ + ID: nulliferBytes, + State: tktypes.MustParseHexBytes(s.StateID), + }, nil +} + +func (p *privateTxManager) withKeyResolutionContext(ctx context.Context, fn func(krc components.KeyResolutionContextLazyDB) error) (err error) { + + // Unlikely we'll be resolving any new identities on this path - if we do, we'll start a new DB transaction + // Note: This requires we're not on an existing DB TX coming into this function + krc := p.components.KeyManager().NewKeyResolutionContextLazyDB(ctx) + defer func() { + if err == nil { + err = krc.Commit() + } else { + krc.Rollback() + } + }() + + err = fn(krc) + return err // note we require err to be set before return +} + +func (p *privateTxManager) BuildNullifiers(ctx context.Context, stateDistributions []*components.StateDistributionWithData) (nullifiers []*components.NullifierUpsert, err error) { + + nullifiers = []*components.NullifierUpsert{} + err = p.withKeyResolutionContext(ctx, func(krc components.KeyResolutionContextLazyDB) error { + for _, s := range stateDistributions { + if s.NullifierAlgorithm == nil || s.NullifierVerifierType == nil || s.NullifierPayloadType == nil { + log.L(ctx).Debugf("No nullifier required for state %s on node %s", s.StateID, p.nodeName) + continue + } + + nullifier, err := p.buildNullifier(ctx, krc, s) + if err != nil { + return err + } + + nullifiers = append(nullifiers, nullifier) + } + return nil + }) + return nullifiers, err +} diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr.go b/core/go/internal/privatetxnmgr/private_txn_mgr.go index e24b8c50f..48ffdb0b8 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr.go @@ -27,7 +27,6 @@ import ( "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/syncpoints" - "github.com/kaleido-io/paladin/core/internal/statedistribution" "gorm.io/gorm" "github.com/kaleido-io/paladin/core/internal/msgs" @@ -58,7 +57,6 @@ type privateTxManager struct { subscribers []components.PrivateTxEventSubscriber subscribersLock sync.Mutex syncPoints syncpoints.SyncPoints - stateDistributer statedistribution.StateDistributer preparedTransactionDistributer preparedtxdistribution.PreparedTransactionDistributer blockHeight int64 } @@ -81,13 +79,6 @@ func (p *privateTxManager) PostInit(c components.AllComponents) error { p.components = c p.nodeName = p.components.TransportManager().LocalNodeName() p.syncPoints = syncpoints.NewSyncPoints(p.ctx, &p.config.Writer, c.Persistence(), c.TxManager(), c.PublicTxManager()) - p.stateDistributer = statedistribution.NewStateDistributer( - p.ctx, - p.components.TransportManager(), - p.components.StateManager(), - p.components.KeyManager(), - p.components.Persistence(), - &p.config.StateDistributer) p.preparedTransactionDistributer = preparedtxdistribution.NewPreparedTransactionDistributer( p.ctx, p.nodeName, @@ -96,11 +87,7 @@ func (p *privateTxManager) PostInit(c components.AllComponents) error { p.components.Persistence(), &p.config.PreparedTransactionDistributer) - err := p.stateDistributer.Start(p.ctx) - if err != nil { - return err - } - err = p.preparedTransactionDistributer.Start(p.ctx) + err := p.preparedTransactionDistributer.Start(p.ctx) if err != nil { return err } diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go index 522c71ee0..4c25628f0 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go @@ -856,14 +856,14 @@ func TestPrivateTxManagerRemoteNotaryEndorser(t *testing.T) { go func() { assert.Equal(t, remoteNodeName, args.Get(1).(*components.TransportMessage).Node) transportMessage := args.Get(1).(*components.TransportMessage) - remoteEngine.ReceiveTransportMessage(ctx, transportMessage) + remoteEngine.HandlePaladinMsg(ctx, transportMessage) }() }).Return(nil).Maybe() remoteEngineMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { transportMessage := args.Get(1).(*components.TransportMessage) - privateTxManager.ReceiveTransportMessage(ctx, transportMessage) + privateTxManager.HandlePaladinMsg(ctx, transportMessage) }() }).Return(nil).Maybe() @@ -1048,7 +1048,7 @@ func TestPrivateTxManagerRemoteNotaryEndorserRetry(t *testing.T) { //ignore the first delegate request and force a retry ignoredDelegateRequest = true } else { - remoteEngine.ReceiveTransportMessage(ctx, transportMessage) + remoteEngine.HandlePaladinMsg(ctx, transportMessage) } }() }).Return(nil).Maybe() @@ -1056,7 +1056,7 @@ func TestPrivateTxManagerRemoteNotaryEndorserRetry(t *testing.T) { remoteEngineMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { transportMessage := args.Get(1).(*components.TransportMessage) - privateTxManager.ReceiveTransportMessage(ctx, transportMessage) + privateTxManager.HandlePaladinMsg(ctx, transportMessage) }() }).Return(nil).Maybe() @@ -2004,7 +2004,7 @@ func TestPrivateTxManagerDependantTransactionEndorsedOutOfOrder(t *testing.T) { require.NoError(t, err) //now send the endorsements back - aliceEngine.ReceiveTransportMessage(ctx, &components.TransportMessage{ + aliceEngine.HandlePaladinMsg(ctx, &components.TransportMessage{ MessageType: "EndorsementResponse", Payload: endorsementResponse2bytes, }) @@ -2034,7 +2034,7 @@ func TestPrivateTxManagerDependantTransactionEndorsedOutOfOrder(t *testing.T) { require.NoError(t, err) //now send the final endorsement back - aliceEngine.ReceiveTransportMessage(ctx, &components.TransportMessage{ + aliceEngine.HandlePaladinMsg(ctx, &components.TransportMessage{ MessageType: "EndorsementResponse", Payload: endorsementResponse1Bytes, }) @@ -2683,7 +2683,7 @@ func mockNetwork(t *testing.T, transactionManagers []privateTransactionMgrForPac transportMessage := args.Get(1).(*components.TransportMessage) for _, tm := range transactionManagers { if tm.NodeName() == transportMessage.Node { - tm.ReceiveTransportMessage(context.Background(), transportMessage) + tm.HandlePaladinMsg(context.Background(), transportMessage) return } } diff --git a/core/go/internal/privatetxnmgr/sequencer.go b/core/go/internal/privatetxnmgr/sequencer.go index 95d1e92c9..6cf7c386f 100644 --- a/core/go/internal/privatetxnmgr/sequencer.go +++ b/core/go/internal/privatetxnmgr/sequencer.go @@ -30,7 +30,6 @@ import ( "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/syncpoints" - "github.com/kaleido-io/paladin/core/internal/statedistribution" pbEngine "github.com/kaleido-io/paladin/core/pkg/proto/engine" "github.com/kaleido-io/paladin/toolkit/pkg/log" @@ -120,7 +119,6 @@ type Sequencer struct { publisher ptmgrtypes.Publisher identityResolver components.IdentityResolver syncPoints syncpoints.SyncPoints - stateDistributer statedistribution.StateDistributer preparedTransactionDistributer preparedtxdistribution.PreparedTransactionDistributer transportWriter ptmgrtypes.TransportWriter graph Graph @@ -143,7 +141,6 @@ func NewSequencer( publisher ptmgrtypes.Publisher, syncPoints syncpoints.SyncPoints, identityResolver components.IdentityResolver, - stateDistributer statedistribution.StateDistributer, preparedTransactionDistributer preparedtxdistribution.PreparedTransactionDistributer, transportWriter ptmgrtypes.TransportWriter, requestTimeout time.Duration, @@ -176,7 +173,6 @@ func NewSequencer( publisher: publisher, syncPoints: syncPoints, identityResolver: identityResolver, - stateDistributer: stateDistributer, preparedTransactionDistributer: preparedTransactionDistributer, transportWriter: transportWriter, graph: NewGraph(), @@ -225,7 +221,6 @@ func NewSequencer( contractAddress, newSequencer.environment, confutil.DurationMin(sequencerConfig.AssembleRequestTimeout, 1*time.Millisecond, *pldconf.PrivateTxManagerDefaults.Sequencer.AssembleRequestTimeout), - stateDistributer, newSequencer, ) diff --git a/core/go/internal/privatetxnmgr/sequencer_test.go b/core/go/internal/privatetxnmgr/sequencer_test.go index e53ed7e76..e3ed7df23 100644 --- a/core/go/internal/privatetxnmgr/sequencer_test.go +++ b/core/go/internal/privatetxnmgr/sequencer_test.go @@ -28,7 +28,6 @@ import ( "github.com/kaleido-io/paladin/core/mocks/componentmocks" "github.com/kaleido-io/paladin/core/mocks/preparedtxdistributionmocks" "github.com/kaleido-io/paladin/core/mocks/privatetxnmgrmocks" - "github.com/kaleido-io/paladin/core/mocks/statedistributionmocks" "github.com/kaleido-io/paladin/core/pkg/persistence" "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" @@ -52,7 +51,6 @@ type sequencerDepencyMocks struct { endorsementGatherer *privatetxnmgrmocks.EndorsementGatherer publisher *privatetxnmgrmocks.Publisher identityResolver *componentmocks.IdentityResolver - stateDistributer *statedistributionmocks.StateDistributer preparedTransactionDistributer *preparedtxdistributionmocks.PreparedTransactionDistributer txManager *componentmocks.TXManager pubTxManager *componentmocks.PublicTxManager @@ -77,7 +75,6 @@ func newSequencerForTesting(t *testing.T, ctx context.Context, domainAddress *tk endorsementGatherer: privatetxnmgrmocks.NewEndorsementGatherer(t), publisher: privatetxnmgrmocks.NewPublisher(t), identityResolver: componentmocks.NewIdentityResolver(t), - stateDistributer: statedistributionmocks.NewStateDistributer(t), preparedTransactionDistributer: preparedtxdistributionmocks.NewPreparedTransactionDistributer(t), txManager: componentmocks.NewTXManager(t), pubTxManager: componentmocks.NewPublicTxManager(t), diff --git a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go index 707fcac3c..4f9719da4 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go +++ b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go @@ -22,7 +22,6 @@ import ( "github.com/google/uuid" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" - "github.com/kaleido-io/paladin/core/internal/statedistribution" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" "gorm.io/gorm" @@ -34,7 +33,6 @@ type dispatchOperation struct { privateDispatches []*components.ValidatedTransaction preparedTransactions []*components.PrepareTransactionWithRefs preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistributionPersisted - stateDistributions []*statedistribution.StateDistributionPersisted } type DispatchPersisted struct { @@ -62,17 +60,6 @@ type DispatchBatch struct { // to submit public transactions. func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistribution) error { - stateDistributionsPersisted := make([]*statedistribution.StateDistributionPersisted, 0, len(stateDistributions)) - for _, stateDistribution := range stateDistributions { - stateDistributionsPersisted = append(stateDistributionsPersisted, &statedistribution.StateDistributionPersisted{ - ID: stateDistribution.ID, - StateID: tktypes.MustParseHexBytes(stateDistribution.StateID), - IdentityLocator: stateDistribution.IdentityLocator, - DomainName: stateDistribution.Domain, - ContractAddress: *tktypes.MustEthAddress(stateDistribution.ContractAddress), - }) - } - preparedTxnDistributionsPersisted := make([]*preparedtxdistribution.PreparedTxnDistributionPersisted, 0, len(dispatchBatch.PreparedTransactions)) for _, preparedTxnDistribution := range preparedTxnDistributions { preparedTxnDistributionsPersisted = append(preparedTxnDistributionsPersisted, &preparedtxdistribution.PreparedTxnDistributionPersisted{ @@ -93,7 +80,6 @@ func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contrac privateDispatches: dispatchBatch.PrivateDispatches, preparedTransactions: dispatchBatch.PreparedTransactions, preparedTxnDistributions: preparedTxnDistributionsPersisted, - stateDistributions: stateDistributionsPersisted, }, }) diff --git a/core/go/internal/privatetxnmgr/transaction_flow_test.go b/core/go/internal/privatetxnmgr/transaction_flow_test.go index 44b31a271..dee921239 100644 --- a/core/go/internal/privatetxnmgr/transaction_flow_test.go +++ b/core/go/internal/privatetxnmgr/transaction_flow_test.go @@ -27,7 +27,6 @@ import ( "github.com/kaleido-io/paladin/core/mocks/componentmocks" "github.com/kaleido-io/paladin/core/mocks/privatetxnmgrmocks" "github.com/kaleido-io/paladin/core/mocks/prvtxsyncpointsmocks" - "github.com/kaleido-io/paladin/core/mocks/statedistributionmocks" "github.com/kaleido-io/paladin/toolkit/pkg/algorithms" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/signpayloads" @@ -53,7 +52,6 @@ type transactionFlowDepencyMocks struct { transportWriter *privatetxnmgrmocks.TransportWriter environment *privatetxnmgrmocks.SequencerEnvironment coordinatorSelector *privatetxnmgrmocks.CoordinatorSelector - stateDistributer *statedistributionmocks.StateDistributer localAssembler *privatetxnmgrmocks.LocalAssembler } @@ -74,7 +72,6 @@ func newTransactionFlowForTesting(t *testing.T, ctx context.Context, transaction transportWriter: privatetxnmgrmocks.NewTransportWriter(t), environment: privatetxnmgrmocks.NewSequencerEnvironment(t), coordinatorSelector: privatetxnmgrmocks.NewCoordinatorSelector(t), - stateDistributer: statedistributionmocks.NewStateDistributer(t), localAssembler: privatetxnmgrmocks.NewLocalAssembler(t), } contractAddress := tktypes.RandAddress() diff --git a/core/go/internal/privatetxnmgr/transport_receiver.go b/core/go/internal/privatetxnmgr/transport_receiver.go index 3d82092a4..b42298e5d 100644 --- a/core/go/internal/privatetxnmgr/transport_receiver.go +++ b/core/go/internal/privatetxnmgr/transport_receiver.go @@ -26,7 +26,7 @@ func (p *privateTxManager) Destination() string { return components.PRIVATE_TX_MANAGER_DESTINATION } -func (p *privateTxManager) ReceiveTransportMessage(ctx context.Context, message *components.TransportMessage) { +func (p *privateTxManager) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { //TODO this need to become an ultra low latency, non blocking, handover to the event loop thread. // need some thought on how to handle errors, retries, buffering, swapping idle sequencers in and out of memory etc... diff --git a/core/go/internal/statedistribution/acknowledgment_writer.go b/core/go/internal/statedistribution/acknowledgment_writer.go deleted file mode 100644 index 59ce28dd5..000000000 --- a/core/go/internal/statedistribution/acknowledgment_writer.go +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package statedistribution - -import ( - "context" - - "github.com/google/uuid" - "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/flushwriter" - "github.com/kaleido-io/paladin/core/pkg/persistence" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "gorm.io/gorm" - "gorm.io/gorm/clause" -) - -type acknowledgementWriterNoResult struct{} -type acknowledgementWriteOperation struct { - StateDistributionID string -} - -type acknowledgementWriter struct { - flushWriter flushwriter.Writer[*acknowledgementWriteOperation, *acknowledgementWriterNoResult] -} - -func NewAcknowledgementWriter(ctx context.Context, persistence persistence.Persistence, conf *pldconf.FlushWriterConfig) *acknowledgementWriter { - aw := &acknowledgementWriter{} - aw.flushWriter = flushwriter.NewWriter(ctx, aw.runBatch, persistence, conf, &pldconf.DistributerWriterConfigDefaults) - return aw -} - -func (wo *acknowledgementWriteOperation) WriteKey() string { - //no ordering requirements so just assign a worker at random for each write - return wo.StateDistributionID -} - -type stateDistributionAcknowledgement struct { - StateDistribution string `json:"stateDistribution" gorm:"column:state_distribution"` - ID string `json:"id" gorm:"column:id"` -} - -func (aw *acknowledgementWriter) runBatch(ctx context.Context, tx *gorm.DB, values []*acknowledgementWriteOperation) (func(error), []flushwriter.Result[*acknowledgementWriterNoResult], error) { - log.L(ctx).Debugf("acknowledgementWriter:runBatch %d acknowledgements", len(values)) - - acknowledgements := make([]*stateDistributionAcknowledgement, 0, len(values)) - for _, value := range values { - acknowledgements = append(acknowledgements, &stateDistributionAcknowledgement{ - StateDistribution: value.StateDistributionID, - ID: uuid.New().String(), - }) - } - - err := tx. - Table("state_distribution_acknowledgments"). - Clauses(clause.OnConflict{ - DoNothing: true, // immutable - }). - Create(acknowledgements). - Error - if err != nil { - log.L(ctx).Errorf("Error persisting state distribution acknowledgements: %s", err) - } - - // We don't actually provide any result, so just build an array of nil results - return nil, make([]flushwriter.Result[*acknowledgementWriterNoResult], len(values)), err - -} - -func (aw *acknowledgementWriter) Start() { - aw.flushWriter.Start() -} - -func (aw *acknowledgementWriter) Stop() { - aw.flushWriter.Shutdown() -} - -func (aw *acknowledgementWriter) Queue(ctx context.Context, stateDistributionID string) { - aw.flushWriter.Queue(ctx, &acknowledgementWriteOperation{ - StateDistributionID: stateDistributionID, - }) -} diff --git a/core/go/internal/statedistribution/received_state_writer.go b/core/go/internal/statedistribution/received_state_writer.go deleted file mode 100644 index 481ff83ba..000000000 --- a/core/go/internal/statedistribution/received_state_writer.go +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package statedistribution - -import ( - "context" - - "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/internal/flushwriter" - "github.com/kaleido-io/paladin/core/pkg/persistence" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" - "gorm.io/gorm" -) - -type receivedStateWriterNoResult struct{} -type receivedStateWriteOperation struct { - DomainName string - ContractAddress tktypes.EthAddress - SchemaID tktypes.Bytes32 - StateDataJson tktypes.RawJSON - Nullifier *components.NullifierUpsert -} - -type receivedStateWriter struct { - flushWriter flushwriter.Writer[*receivedStateWriteOperation, *receivedStateWriterNoResult] - stateManager components.StateManager -} - -func NewReceivedStateWriter(ctx context.Context, stateManager components.StateManager, persistence persistence.Persistence, conf *pldconf.FlushWriterConfig) *receivedStateWriter { - rsw := &receivedStateWriter{ - stateManager: stateManager, - } - rsw.flushWriter = flushwriter.NewWriter(ctx, rsw.runBatch, persistence, conf, &pldconf.DistributerWriterConfigDefaults) - return rsw -} - -func (wo *receivedStateWriteOperation) WriteKey() string { - return wo.DomainName -} - -func (rsw *receivedStateWriter) runBatch(ctx context.Context, tx *gorm.DB, values []*receivedStateWriteOperation) (func(error), []flushwriter.Result[*receivedStateWriterNoResult], error) { - log.L(ctx).Debugf("receivedStateWriter:runBatch %d acknowledgements", len(values)) - - if len(values) == 0 { - return nil, nil, nil - } - - type insertsForDomain struct { - nullifiers []*components.NullifierUpsert - stateUpserts []*components.StateUpsertOutsideContext - } - - byDomain := make(map[string]*insertsForDomain) - - for _, receivedStateWriteOperation := range values { - - domainOps := byDomain[receivedStateWriteOperation.DomainName] - if domainOps == nil { - domainOps = &insertsForDomain{} - byDomain[receivedStateWriteOperation.DomainName] = domainOps - } - - domainOps.stateUpserts = append(domainOps.stateUpserts, &components.StateUpsertOutsideContext{ - ContractAddress: receivedStateWriteOperation.ContractAddress, - SchemaID: receivedStateWriteOperation.SchemaID, - Data: receivedStateWriteOperation.StateDataJson, - }) - if receivedStateWriteOperation.Nullifier != nil { - domainOps.nullifiers = append(domainOps.nullifiers, receivedStateWriteOperation.Nullifier) - } - } - - for domainName, domainOps := range byDomain { - _, err := rsw.stateManager.WriteReceivedStates(ctx, tx, domainName, domainOps.stateUpserts) - - if err == nil && len(domainOps.nullifiers) > 0 { - err = rsw.stateManager.WriteNullifiersForReceivedStates(ctx, tx, domainName, domainOps.nullifiers) - } - - if err != nil { - log.L(ctx).Errorf("Error writing received states: %s", err) - return nil, nil, err - } - - } - - // We don't actually provide any result, so just build an array of nil results - return nil, make([]flushwriter.Result[*receivedStateWriterNoResult], len(values)), nil - -} - -func (rsw *receivedStateWriter) Start() { - rsw.flushWriter.Start() -} - -func (rsw *receivedStateWriter) Stop() { - rsw.flushWriter.Shutdown() -} - -func (rsw *receivedStateWriter) QueueAndWait(ctx context.Context, domainName string, contractAddress tktypes.EthAddress, schemaID tktypes.Bytes32, stateDataJson tktypes.RawJSON, nullifier *components.NullifierUpsert) error { - log.L(ctx).Debugf("receivedStateWriter:QueueAndWait domainName=%s contractAddress=%s schemaID=%s", domainName, contractAddress, schemaID) - op := rsw.flushWriter.Queue(ctx, &receivedStateWriteOperation{ - DomainName: domainName, - ContractAddress: contractAddress, - SchemaID: schemaID, - StateDataJson: stateDataJson, - Nullifier: nullifier, - }) - _, err := op.WaitFlushed(ctx) - if err != nil { - log.L(ctx).Errorf("Error waiting for state distribution write: %s", err) - } - return err -} diff --git a/core/go/internal/statedistribution/state_distributer.go b/core/go/internal/statedistribution/state_distributer.go deleted file mode 100644 index e23f3ee5c..000000000 --- a/core/go/internal/statedistribution/state_distributer.go +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package statedistribution - -import ( - "context" - "time" - - "github.com/hyperledger/firefly-common/pkg/i18n" - "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/internal/msgs" - "github.com/kaleido-io/paladin/core/pkg/persistence" - pb "github.com/kaleido-io/paladin/core/pkg/proto/engine" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/retry" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" -) - -const RETRY_TIMEOUT = 5 * time.Second - -func NewStateDistributer( - ctx context.Context, - transportManager components.TransportManager, - stateManager components.StateManager, - keyManager components.KeyManager, - persistence persistence.Persistence, - conf *pldconf.DistributerConfig, -) StateDistributer { - sd := &stateDistributer{ - persistence: persistence, - inputChan: make(chan *components.StateDistributionWithData), - retryChan: make(chan string), - acknowledgedChan: make(chan string), - pendingMap: make(map[string]*components.StateDistributionWithData), - stateManager: stateManager, - keyManager: keyManager, - transportManager: transportManager, - localNodeName: transportManager.LocalNodeName(), - retry: retry.NewRetryIndefinite(&pldconf.RetryConfig{}, &pldconf.GenericRetryDefaults.RetryConfig), - } - sd.acknowledgementWriter = NewAcknowledgementWriter(ctx, sd.persistence, &conf.AcknowledgementWriter) - sd.receivedStateWriter = NewReceivedStateWriter(ctx, stateManager, persistence, &conf.ReceivedObjectWriter) - - return sd -} - -type StateDistributionPersisted struct { - Created tktypes.Timestamp `json:"created" gorm:"column:created;autoCreateTime:nano"` - ID string `json:"id"` - StateID tktypes.HexBytes `json:"stateID"` - IdentityLocator string `json:"identityLocator"` - DomainName string `json:"domainName"` - ContractAddress tktypes.EthAddress `json:"contractAddress"` - NullifierAlgorithm *string `json:"nullifierAlgorithm,omitempty"` - NullifierVerifierType *string `json:"nullifierVerifierType,omitempty"` - NullifierPayloadType *string `json:"nullifierPayloadType,omitempty"` -} - -/* -StateDistributer is a component that is responsible for distributing state to remote parties - - it runs in its own goroutine and periodically sends states to the intended recipients - until each recipient has acknowledged receipt of the state. - - This operates on in-memory data but will initialize from persistent storage on startup -*/ -type StateDistributer interface { - Start(ctx context.Context) error - Stop(ctx context.Context) - BuildNullifiers(ctx context.Context, stateDistributions []*components.StateDistributionWithData) ([]*components.NullifierUpsert, error) - DistributeStates(ctx context.Context, stateDistributions []*components.StateDistributionWithData) - HandleStateProducedEvent(ctx context.Context, stateProducedEvent *pb.StateProducedEvent, distributingNode string) - HandleStateAcknowledgedEvent(ctx context.Context, messagePayload []byte) -} - -type stateDistributer struct { - runCtx context.Context - stopRunCtx context.CancelFunc - persistence persistence.Persistence - stateManager components.StateManager - keyManager components.KeyManager - inputChan chan *components.StateDistributionWithData - retryChan chan string - acknowledgedChan chan string - pendingMap map[string]*components.StateDistributionWithData - acknowledgementWriter *acknowledgementWriter - receivedStateWriter *receivedStateWriter - transportManager components.TransportManager - localNodeName string - retry *retry.Retry -} - -func (sd *stateDistributer) Start(bgCtx context.Context) error { - sd.runCtx, sd.stopRunCtx = context.WithCancel(bgCtx) - ctx := sd.runCtx - log.L(ctx).Info("stateDistributer:Start") - - sd.acknowledgementWriter.Start() - sd.receivedStateWriter.Start() - - // TODO: This needs to be a worker per-peer - probably a whole state distributor per peer that can be swapped in/out. - // Currently it only runs on startup, and pushes all state distributions from before the startup time into the distributor. - startTime := tktypes.TimestampNow() - go func() { - page := 0 - dispatched := 0 - var lastEntry *StateDistributionPersisted - finished := false - for !finished { - err := sd.retry.Do(ctx, func(attempt int) (retryable bool, err error) { - page++ - var stateDistributions []*StateDistributionPersisted - query := sd.persistence.DB().Table("state_distributions"). - Select("state_distributions.*"). - Joins("LEFT JOIN state_distribution_acknowledgments ON state_distributions.id = state_distribution_acknowledgments.state_distribution"). - Where("state_distribution_acknowledgments.id IS NULL"). - Where("created < ?", startTime). - Order("created"). - Limit(100) - if lastEntry != nil { - query = query.Where("created > ?", lastEntry.Created) - } - err = query.Find(&stateDistributions).Error - - if err != nil { - log.L(ctx).Errorf("Error getting state distributions: %s", err) - return true, err - } - - log.L(ctx).Infof("stateDistributer loaded %d state distributions on startup (page=%d)", len(stateDistributions), page) - - for _, stateDistribution := range stateDistributions { - state, err := sd.stateManager.GetState(ctx, sd.persistence.DB(), /* no TX for now */ - stateDistribution.DomainName, stateDistribution.ContractAddress, stateDistribution.StateID, true, false) - if err != nil { - log.L(ctx).Errorf("Error getting state: %s", err) - continue - } - - sd.inputChan <- &components.StateDistributionWithData{ - ID: stateDistribution.ID, - StateID: stateDistribution.StateID.String(), - IdentityLocator: stateDistribution.IdentityLocator, - Domain: stateDistribution.DomainName, - ContractAddress: stateDistribution.ContractAddress.String(), - SchemaID: state.Schema.String(), - StateDataJson: string(state.Data), - NullifierAlgorithm: stateDistribution.NullifierAlgorithm, - NullifierVerifierType: stateDistribution.NullifierVerifierType, - NullifierPayloadType: stateDistribution.NullifierPayloadType, - } - - dispatched++ - lastEntry = stateDistribution - } - finished = (len(stateDistributions) == 0) - return false, nil - }) - if err != nil { - log.L(ctx).Warnf("exiting before sending all recovered state distributions") - } - } - log.L(ctx).Infof("stateDistributer finished startup recovery after dispatching %d distributions", dispatched) - }() - - go func() { - log.L(ctx).Info("stateDistributer:Loop starting loop") - for { - log.L(ctx).Debug("stateDistributer:Loop waiting for next event") - - select { - case <-ctx.Done(): - return - case stateDistributionID := <-sd.acknowledgedChan: - _, stillPending := sd.pendingMap[stateDistributionID] - if stillPending { - log.L(ctx).Debugf("stateDistributer:Loop processing acknowledgment %s", stateDistributionID) - - delete(sd.pendingMap, stateDistributionID) - } else { - log.L(ctx).Debugf("stateDistributer:Loop already received acknowledgment %s", stateDistributionID) - - } - //if we didn't find it in the map, it was already acknowledged - - case stateDistributionID := <-sd.retryChan: - - pendingDistribution, stillPending := sd.pendingMap[stateDistributionID] - if stillPending { - log.L(ctx).Debugf("stateDistributer:Loop retrying %s", stateDistributionID) - sd.sendState(ctx, pendingDistribution) - } - //if we didn't find it in the map, it was already acknowledged - - case stateDistribution := <-sd.inputChan: - log.L(ctx).Debugf("stateDistributer:Loop new distribution %s", stateDistribution.ID) - - sd.pendingMap[stateDistribution.ID] = stateDistribution - sd.sendState(ctx, stateDistribution) - - } - } - }() - return nil -} - -func (sd *stateDistributer) buildNullifier(ctx context.Context, krc components.KeyResolutionContextLazyDB, s *components.StateDistributionWithData) (*components.NullifierUpsert, error) { - // We need to call the signing engine with the local identity to build the nullifier - log.L(ctx).Infof("Generating nullifier for state %s on node %s (algorithm=%s,verifierType=%s,payloadType=%s)", - s.StateID, sd.localNodeName, *s.NullifierAlgorithm, *s.NullifierVerifierType, *s.NullifierPayloadType) - - // We require a fully qualified identifier for the local node in this function - identifier, node, err := tktypes.PrivateIdentityLocator(s.IdentityLocator).Validate(ctx, "", false) - if err != nil || node != sd.localNodeName { - return nil, i18n.WrapError(ctx, err, msgs.MsgStateDistributorNullifierNotLocal) - } - - // Call the signing engine to build the nullifier - var nulliferBytes []byte - mapping, err := krc.KeyResolverLazyDB().ResolveKey(identifier, *s.NullifierAlgorithm, *s.NullifierVerifierType) - if err == nil { - nulliferBytes, err = sd.keyManager.Sign(ctx, mapping, *s.NullifierPayloadType, []byte(s.StateDataJson)) - } - if err != nil || len(nulliferBytes) == 0 { - return nil, i18n.WrapError(ctx, err, msgs.MsgStateDistributorNullifierFail, s.StateID) - } - return &components.NullifierUpsert{ - ID: nulliferBytes, - State: tktypes.MustParseHexBytes(s.StateID), - }, nil -} - -func (sd *stateDistributer) withKeyResolutionContext(ctx context.Context, fn func(krc components.KeyResolutionContextLazyDB) error) (err error) { - - // Unlikely we'll be resolving any new identities on this path - if we do, we'll start a new DB transaction - // Note: This requires we're not on an existing DB TX coming into this function - krc := sd.keyManager.NewKeyResolutionContextLazyDB(ctx) - defer func() { - if err == nil { - err = krc.Commit() - } else { - krc.Rollback() - } - }() - - err = fn(krc) - return err // note we require err to be set before return -} - -func (sd *stateDistributer) BuildNullifiers(ctx context.Context, stateDistributions []*components.StateDistributionWithData) (nullifiers []*components.NullifierUpsert, err error) { - - nullifiers = []*components.NullifierUpsert{} - err = sd.withKeyResolutionContext(ctx, func(krc components.KeyResolutionContextLazyDB) error { - for _, s := range stateDistributions { - if s.NullifierAlgorithm == nil || s.NullifierVerifierType == nil || s.NullifierPayloadType == nil { - log.L(ctx).Debugf("No nullifier required for state %s on node %s", s.ID, sd.localNodeName) - continue - } - - nullifier, err := sd.buildNullifier(ctx, krc, s) - if err != nil { - return err - } - - nullifiers = append(nullifiers, nullifier) - } - return nil - }) - return nullifiers, err -} - -func (sd *stateDistributer) Stop(ctx context.Context) { - sd.stopRunCtx() - sd.acknowledgementWriter.Stop() - sd.receivedStateWriter.Stop() -} diff --git a/core/go/internal/statedistribution/state_distributer_test.go b/core/go/internal/statedistribution/state_distributer_test.go deleted file mode 100644 index 95c9b9a4a..000000000 --- a/core/go/internal/statedistribution/state_distributer_test.go +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package statedistribution - -import ( - "context" - "fmt" - "testing" - - "github.com/google/uuid" - "github.com/kaleido-io/paladin/config/pkg/confutil" - "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/mocks/componentmocks" - "github.com/kaleido-io/paladin/core/pkg/persistence/mockpersistence" - "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -type mockComponents struct { - db *mockpersistence.SQLMockProvider - stateManager *componentmocks.StateManager - keyManager *componentmocks.KeyManager - transportManager *componentmocks.TransportManager - keyResolver *componentmocks.KeyResolver -} - -func newTestStateDistributor(t *testing.T) (context.Context, *mockComponents, *stateDistributer) { - ctx := context.Background() - - mc := &mockComponents{ - stateManager: componentmocks.NewStateManager(t), - keyManager: componentmocks.NewKeyManager(t), - transportManager: componentmocks.NewTransportManager(t), - keyResolver: componentmocks.NewKeyResolver(t), - } - mc.transportManager.On("LocalNodeName").Return("node1") - mkrc := componentmocks.NewKeyResolutionContextLazyDB(t) - mkrc.On("KeyResolverLazyDB").Return(mc.keyResolver).Maybe() - mkrc.On("Commit").Return(nil).Maybe() - mkrc.On("Rollback").Return().Maybe() - mc.keyManager.On("NewKeyResolutionContextLazyDB", mock.Anything).Return(mkrc).Maybe() - - mdb, err := mockpersistence.NewSQLMockProvider() - require.NoError(t, err) - mc.db = mdb - - sd := NewStateDistributer(ctx, mc.transportManager, mc.stateManager, mc.keyManager, mc.db.P, &pldconf.DistributerConfig{}) - - return ctx, mc, sd.(*stateDistributer) - -} - -func TestBuildNullifiersNoOp(t *testing.T) { - - ctx, _, sd := newTestStateDistributor(t) - - nullifiers, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ - { - ID: uuid.New().String(), - StateID: "id1", - IdentityLocator: "target@node1", - }, - }) - require.NoError(t, err) - assert.Empty(t, nullifiers) - -} - -func TestBuildNullifiersOk(t *testing.T) { - - ctx, mc, sd := newTestStateDistributor(t) - - keyMapping := &pldapi.KeyMappingAndVerifier{ - KeyMappingWithPath: &pldapi.KeyMappingWithPath{ - KeyMapping: &pldapi.KeyMapping{ - Identifier: "target", - Wallet: "wallet1", - KeyHandle: "key1", - }, - }, - } - - mc.keyResolver.On("ResolveKey", "target", "nullifier_algo", "nullifier_verifier_type"). - Return(keyMapping, nil) - - nullifierBytes := tktypes.RandBytes(32) - mc.keyManager.On("Sign", ctx, keyMapping, "nullifier_payload_type", []byte(`{"state":"data"}`)). - Return(nullifierBytes, nil) - - stateID := tktypes.HexBytes(tktypes.RandBytes(32)) - nullifiers, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ - { - ID: uuid.New().String(), - StateID: stateID.String(), - IdentityLocator: "target@node1", - StateDataJson: `{"state":"data"}`, - NullifierAlgorithm: confutil.P("nullifier_algo"), - NullifierVerifierType: confutil.P("nullifier_verifier_type"), - NullifierPayloadType: confutil.P("nullifier_payload_type"), - }, - }) - require.NoError(t, err) - assert.Equal(t, []*components.NullifierUpsert{ - { - ID: nullifierBytes, - State: stateID, - }, - }, nullifiers) - -} - -func TestBuildNullifiersFail(t *testing.T) { - - ctx, mc, sd := newTestStateDistributor(t) - - keyMapping := &pldapi.KeyMappingAndVerifier{} - - mc.keyResolver.On("ResolveKey", "target", "nullifier_algo", "nullifier_verifier_type"). - Return(&pldapi.KeyMappingAndVerifier{}, nil) - - mc.keyManager.On("Sign", ctx, keyMapping, "nullifier_payload_type", []byte(`{"state":"data"}`)). - Return(nil, fmt.Errorf("pop")) - - stateID := tktypes.HexBytes(tktypes.RandBytes(32)) - _, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ - { - ID: uuid.New().String(), - StateID: stateID.String(), - IdentityLocator: "target@node1", - StateDataJson: `{"state":"data"}`, - NullifierAlgorithm: confutil.P("nullifier_algo"), - NullifierVerifierType: confutil.P("nullifier_verifier_type"), - NullifierPayloadType: confutil.P("nullifier_payload_type"), - }, - }) - assert.Regexp(t, "PD012401.*pop", err) - -} - -func TestBuildNullifiersNotLocal(t *testing.T) { - - ctx, _, sd := newTestStateDistributor(t) - - stateID := tktypes.HexBytes(tktypes.RandHex(32)) - _, err := sd.BuildNullifiers(ctx, []*components.StateDistributionWithData{ - { - ID: uuid.New().String(), - StateID: stateID.String(), - IdentityLocator: "target", // missing node - StateDataJson: `{"state":"data"}`, - NullifierAlgorithm: confutil.P("nullifier_algo"), - NullifierVerifierType: confutil.P("nullifier_verifier_type"), - NullifierPayloadType: confutil.P("nullifier_payload_type"), - }, - }) - assert.Regexp(t, "PD012400", err) - -} diff --git a/core/go/internal/statedistribution/state_receiver.go b/core/go/internal/statedistribution/state_receiver.go deleted file mode 100644 index f460eb840..000000000 --- a/core/go/internal/statedistribution/state_receiver.go +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package statedistribution - -import ( - "context" - - "github.com/kaleido-io/paladin/core/internal/components" - pb "github.com/kaleido-io/paladin/core/pkg/proto/engine" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "google.golang.org/protobuf/proto" -) - -func (sd *stateDistributer) sendStateAcknowledgement(ctx context.Context, domainName string, contractAddress string, stateId string, receivingParty string, distributingNode string, distributionID string) error { - log.L(ctx).Debugf("stateDistributer:sendStateAcknowledgement domainName=%s contractAddress=%s stateId=%s receivingParty=%s distributingNode=%s distributionID=%s", domainName, contractAddress, stateId, receivingParty, distributingNode, distributionID) - stateAcknowledgedEvent := &pb.StateAcknowledgedEvent{ - DomainName: domainName, - ContractAddress: contractAddress, - StateId: stateId, - Party: receivingParty, - DistributionId: distributionID, - } - stateAcknowledgedEventBytes, err := proto.Marshal(stateAcknowledgedEvent) - if err != nil { - log.L(ctx).Errorf("Error marshalling state acknowledgment event: %s", err) - return err - } - - err = sd.transportManager.Send(ctx, &components.TransportMessage{ - MessageType: "StateAcknowledgedEvent", - Payload: stateAcknowledgedEventBytes, - Node: distributingNode, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, - ReplyTo: sd.localNodeName, - }) - if err != nil { - log.L(ctx).Errorf("Error sending state produced event: %s", err) - return err - } - - return nil -} diff --git a/core/go/internal/statedistribution/state_sender.go b/core/go/internal/statedistribution/state_sender.go deleted file mode 100644 index a420d4ac9..000000000 --- a/core/go/internal/statedistribution/state_sender.go +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package statedistribution - -import ( - "context" - "time" - - "github.com/kaleido-io/paladin/core/internal/components" - pb "github.com/kaleido-io/paladin/core/pkg/proto/engine" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" - "google.golang.org/protobuf/proto" -) - -func (sd *stateDistributer) DistributeStates(ctx context.Context, stateDistributions []*components.StateDistributionWithData) { - log.L(ctx).Debugf("stateDistributer:DistributeStates %d state distributions", len(stateDistributions)) - for _, stateDistribution := range stateDistributions { - sd.inputChan <- stateDistribution - } -} - -func (sd *stateDistributer) sendState(ctx context.Context, stateDistribution *components.StateDistributionWithData) { - log.L(ctx).Debugf("stateDistributer:sendState id=%s,domain=%s contractAddress=%s schemaId=%s stateId=%s identity=%s, nullifierAlgorithm=%v nullifierVerifierType=%v nullifierPayloadType=%v]", - stateDistribution.ID, - stateDistribution.Domain, - stateDistribution.ContractAddress, - stateDistribution.SchemaID, - stateDistribution.StateID, - stateDistribution.IdentityLocator, - stateDistribution.NullifierAlgorithm, - stateDistribution.NullifierVerifierType, - stateDistribution.NullifierPayloadType, - ) - - stateProducedEvent := &pb.StateProducedEvent{ - DistributionId: stateDistribution.ID, - DomainName: stateDistribution.Domain, - ContractAddress: stateDistribution.ContractAddress, - SchemaId: stateDistribution.SchemaID, - StateId: stateDistribution.StateID, - StateDataJson: stateDistribution.StateDataJson, - Party: stateDistribution.IdentityLocator, - NullifierAlgorithm: stateDistribution.NullifierAlgorithm, - NullifierVerifierType: stateDistribution.NullifierVerifierType, - NullifierPayloadType: stateDistribution.NullifierPayloadType, - } - stateProducedEventBytes, err := proto.Marshal(stateProducedEvent) - if err != nil { - log.L(ctx).Errorf("Error marshalling delegate transaction message: %s", err) - return - } - - targetNode, err := tktypes.PrivateIdentityLocator(stateDistribution.IdentityLocator).Node(ctx, false) - if err != nil { - log.L(ctx).Errorf("Error getting node for party %s", stateDistribution.IdentityLocator) - return - } - - err = sd.transportManager.Send(ctx, &components.TransportMessage{ - MessageType: "StateProducedEvent", - Payload: stateProducedEventBytes, - Node: targetNode, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, - ReplyTo: sd.localNodeName, - }) - if err != nil { - log.L(ctx).Errorf("Error sending state produced event: %s", err) - return - } - - go func() { - time.Sleep(RETRY_TIMEOUT) - sd.retryChan <- stateDistribution.ID - }() - -} diff --git a/core/go/internal/statedistribution/transport_client.go b/core/go/internal/statedistribution/transport_client.go deleted file mode 100644 index 41fbdd268..000000000 --- a/core/go/internal/statedistribution/transport_client.go +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package statedistribution - -import ( - "context" - - "github.com/kaleido-io/paladin/core/internal/components" - pb "github.com/kaleido-io/paladin/core/pkg/proto/engine" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" - "google.golang.org/protobuf/proto" -) - -func (sd *stateDistributer) HandleStateProducedEvent(ctx context.Context, stateProducedEvent *pb.StateProducedEvent, distributingNode string) { - log.L(ctx).Debugf("stateDistributer:handleStateProducedEvent") - - var err error - s := &components.StateDistributionWithData{ - ID: stateProducedEvent.DistributionId, - StateID: stateProducedEvent.StateId, - IdentityLocator: stateProducedEvent.Party, - Domain: stateProducedEvent.DomainName, - ContractAddress: stateProducedEvent.ContractAddress, - SchemaID: stateProducedEvent.SchemaId, - StateDataJson: stateProducedEvent.StateDataJson, - NullifierAlgorithm: stateProducedEvent.NullifierAlgorithm, - NullifierVerifierType: stateProducedEvent.NullifierVerifierType, - NullifierPayloadType: stateProducedEvent.NullifierPayloadType, - } - - // We need to build any nullifiers that are required, before we dispatch to persistence - var nullifier *components.NullifierUpsert - if stateProducedEvent.NullifierAlgorithm != nil && stateProducedEvent.NullifierVerifierType != nil && stateProducedEvent.NullifierPayloadType != nil { - err = sd.withKeyResolutionContext(ctx, func(krc components.KeyResolutionContextLazyDB) (err error) { - nullifier, err = sd.buildNullifier(ctx, krc, s) - return err - }) - } - - if err == nil { - err = sd.receivedStateWriter.QueueAndWait(ctx, - s.Domain, - *tktypes.MustEthAddress(s.ContractAddress), - tktypes.MustParseBytes32(s.SchemaID), - tktypes.RawJSON(s.StateDataJson), - nullifier, - ) - } - if err != nil { - log.L(ctx).Errorf("Error writing state: %s", err) - //don't send the acknowledgement, we rely on the sender to retry - return - } - - // No error means either this is the first time we have received this state or we already have it an onConflict ignore means we idempotently accept it - // If the latter, then the sender probably didn't get our previous acknowledgement so either way, we send an acknowledgement - - err = sd.sendStateAcknowledgement( - ctx, - stateProducedEvent.DomainName, - stateProducedEvent.ContractAddress, - stateProducedEvent.StateId, - stateProducedEvent.Party, - distributingNode, - stateProducedEvent.DistributionId) - if err != nil { - log.L(ctx).Errorf("Error sending state acknowledgement: %s", err) - //not much more we can do here. The sender will inevitably retry and we will hopefully send the ack next time - } -} - -func (sd *stateDistributer) HandleStateAcknowledgedEvent(ctx context.Context, messagePayload []byte) { - log.L(ctx).Debugf("stateDistributer:handleStateAcknowledgedEvent") - stateAcknowledgedEvent := &pb.StateAcknowledgedEvent{} - err := proto.Unmarshal(messagePayload, stateAcknowledgedEvent) - if err != nil { - log.L(ctx).Errorf("Failed to unmarshal StateAcknowledgedEvent: %s", err) - return - } - sd.acknowledgementWriter.Queue(ctx, stateAcknowledgedEvent.DistributionId) - // no need to wait for the flush to complete, we can just stop the in memory loop from retrying - // worst case scenario, we crash before this is written to the DB, we do some redundant retries after a restart - // but waiting for the flush here is not going to prevent that - sd.acknowledgedChan <- stateAcknowledgedEvent.DistributionId - -} diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index b6ed42348..043a38a1f 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -31,6 +31,7 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" + "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/retry" "github.com/kaleido-io/paladin/toolkit/pkg/rpcserver" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" @@ -50,7 +51,7 @@ type transportManager struct { transportsByID map[uuid.UUID]*transport transportsByName map[string]*transport - destinations map[string]components.TransportClient + components map[prototk.PaladinMsg_Component]components.TransportClient destinationsFixed bool destinationsMux sync.RWMutex @@ -71,7 +72,7 @@ func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerCo localNodeName: conf.NodeName, transportsByID: make(map[uuid.UUID]*transport), transportsByName: make(map[string]*transport), - destinations: make(map[string]components.TransportClient), + components: make(map[prototk.PaladinMsg_Component]components.TransportClient), senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), reliableMessageResend: confutil.DurationMin(conf.ReliableMessageResend, 100*time.Millisecond, *pldconf.TransportManagerDefaults.ReliableMessageResend), sendShortRetry: retry.NewRetryLimited(&conf.SendRetry, &pldconf.TransportManagerDefaults.SendRetry), @@ -126,11 +127,11 @@ func (tm *transportManager) RegisterClient(ctx context.Context, client component if tm.destinationsFixed { return i18n.NewError(tm.bgCtx, msgs.MsgTransportClientRegisterAfterStartup, client.Destination()) } - if _, found := tm.destinations[client.Destination()]; found { + if _, found := tm.components[client.Destination()]; found { log.L(ctx).Errorf("Client already registered for destination %s", client.Destination()) return i18n.NewError(tm.bgCtx, msgs.MsgTransportClientAlreadyRegistered, client.Destination()) } - tm.destinations[client.Destination()] = client + tm.components[client.Destination()] = client return nil } @@ -214,34 +215,39 @@ func (tm *transportManager) LocalNodeName() string { } // See docs in components package -func (tm *transportManager) Send(ctx context.Context, msg *components.TransportMessage) error { - - msg.MessageID = uuid.New() +func (tm *transportManager) Send(ctx context.Context, send *components.FireAndForgetMessageSend) error { // Check the message is valid - if len(msg.MessageType) == 0 || - len(msg.Payload) == 0 { - log.L(ctx).Errorf("Invalid message send request %+v", msg) + if len(send.MessageType) == 0 || + len(send.Payload) == 0 { + log.L(ctx).Errorf("Invalid message send request %+v", send) return i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } - if msg.ReplyTo == "" { - msg.ReplyTo = tm.localNodeName - } - // Use or establish a peer connection for the send - peer, err := tm.getPeer(ctx, msg.Node) + peer, err := tm.getPeer(ctx, send.Node) if err != nil { return err } + msg := &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + MessageType: send.MessageType, + Component: send.Component, + Payload: send.Payload, + } + if send.CorrelationID != nil { + cidStr := send.CorrelationID.String() + msg.CorrelationId = &cidStr + } + // Push the send to the peer - this is a best effort interaction. // There is some retry in the Paladin layer, and some transports provide resilience. // However, the send is at-most-once, and the higher level message protocols that // use this "send" must be fault tolerant to message loss. select { case peer.sendQueue <- msg: - log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageID, msg.CorrelationID, peer.name) + log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageId, send.CorrelationID, peer.name) return nil case <-ctx.Done(): return i18n.NewError(ctx, msgs.MsgContextCanceled) diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 5009feb12..4055e72a5 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -23,7 +23,6 @@ import ( "time" "github.com/hyperledger/firefly-common/pkg/i18n" - "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/msgs" "github.com/kaleido-io/paladin/toolkit/pkg/log" @@ -42,7 +41,7 @@ type peer struct { peerInfo map[string]any // opaque JSON object from the transport persistedMsgsAvailable chan struct{} - sendQueue chan *components.TransportMessage + sendQueue chan *prototk.PaladinMsg // Send loop state (no lock as only used on the loop) lastFullScan time.Time @@ -102,7 +101,7 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer tm: tm, name: nodeName, persistedMsgsAvailable: make(chan struct{}, 1), - sendQueue: make(chan *components.TransportMessage, tm.senderBufferLen), + sendQueue: make(chan *prototk.PaladinMsg, tm.senderBufferLen), done: make(chan struct{}), } p.ctx, p.cancelCtx = context.WithCancel( @@ -160,38 +159,19 @@ func (p *peer) notifyPersistedMsgAvailable() { } } -func (p *peer) mapMsg(msg *components.TransportMessage) *prototk.Message { - // Convert the message to the protobuf transport payload - var correlID *string - if msg.CorrelationID != nil { - correlID = confutil.P(msg.CorrelationID.String()) - } - return &prototk.Message{ - MessageType: msg.MessageType, - MessageId: msg.MessageID.String(), - CorrelationId: correlID, - Component: msg.Component, - Node: msg.Node, - ReplyTo: msg.ReplyTo, - Payload: msg.Payload, - } -} - -func (p *peer) stateDistributionMsg(rm *components.ReliableMessage, targetNode string, sd *components.StateDistributionWithData) *prototk.Message { +func (p *peer) stateDistributionMsg(rm *components.ReliableMessage, targetNode string, sd *components.StateDistributionWithData) *prototk.PaladinMsg { payload, _ := json.Marshal(sd) - return &prototk.Message{ - + return &prototk.PaladinMsg{ + MessageId: rm.ID.String(), MessageType: "StateProducedEvent", Payload: payload, - Node: targetNode, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, - ReplyTo: p.tm.localNodeName, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, } } -func (p *peer) send(msg *prototk.Message) error { +func (p *peer) send(msg *prototk.PaladinMsg) error { return p.tm.sendShortRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { - return true, p.transport.send(p.ctx, msg) + return true, p.transport.send(p.ctx, p.name, msg) }) } @@ -277,7 +257,7 @@ func (p *peer) reliableMessageScan() error { return nil } -func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*prototk.Message, error, error) { +func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*prototk.PaladinMsg, error, error) { // Validate the message first (not retryable) var sd components.StateDistributionWithData @@ -311,10 +291,10 @@ func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*proto func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err error) { // Build the messages - msgsToSend := make([]*prototk.Message, 0, len(page)) + msgsToSend := make([]*prototk.PaladinMsg, 0, len(page)) var errorAcks []*components.ReliableMessageAck for _, rm := range page { - var msg *prototk.Message + var msg *prototk.PaladinMsg var errorAck error switch rm.MessageType.V() { case components.RMTState: diff --git a/core/go/internal/transportmgr/transport.go b/core/go/internal/transportmgr/transport.go index 01e83a64c..3bfbee476 100644 --- a/core/go/internal/transportmgr/transport.go +++ b/core/go/internal/transportmgr/transport.go @@ -29,6 +29,7 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/retry" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" ) @@ -96,12 +97,15 @@ func (t *transport) checkInit(ctx context.Context) error { return nil } -func (t *transport) send(ctx context.Context, msg *prototk.Message) error { +func (t *transport) send(ctx context.Context, nodeName string, msg *prototk.PaladinMsg) error { if err := t.checkInit(ctx); err != nil { return err } - _, err := t.api.SendMessage(ctx, &prototk.SendMessageRequest{Message: msg}) + _, err := t.api.SendMessage(ctx, &prototk.SendMessageRequest{ + Node: nodeName, + Message: msg, + }) if err != nil { return err } @@ -109,7 +113,7 @@ func (t *transport) send(ctx context.Context, msg *prototk.Message) error { if msg.CorrelationId != nil { correlIDStr = *msg.CorrelationId } - log.L(ctx).Debugf("transport %s message sent id=%s (cid=%s) node=%s component=%s type=%s", t.name, msg.MessageId, correlIDStr, msg.Node, msg.Component, msg.MessageType) + log.L(ctx).Debugf("transport %s message sent id=%s (cid=%s) node=%s component=%s type=%s", t.name, msg.MessageId, correlIDStr, nodeName, msg.Component, msg.MessageType) if log.IsTraceEnabled() { log.L(ctx).Tracef("transport %s message sent: %s", t.name, protoToJSON(msg)) } @@ -136,59 +140,36 @@ func (t *transport) ReceiveMessage(ctx context.Context, req *prototk.ReceiveMess return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } - if msg.Node != t.tm.localNodeName { - return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidNodeReceived, msg.Node, t.tm.localNodeName) - } - msgID, err := uuid.Parse(msg.MessageId) if err != nil { log.L(ctx).Errorf("Invalid messageId from transport: %s", protoToJSON(msg)) return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } - var pCorrelID *uuid.UUID - var correlIDStr string - if msg.CorrelationId != nil { - correlIDStr = *msg.CorrelationId - correlID, err := uuid.Parse(correlIDStr) - if err != nil { - log.L(ctx).Errorf("Invalid correlationId from transport: %s", protoToJSON(msg)) - return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) - } - pCorrelID = &correlID - } - log.L(ctx).Debugf("transport %s message received id=%s (cid=%s)", t.name, msgID, correlIDStr) + log.L(ctx).Debugf("transport %s message received id=%s (cid=%s)", t.name, msgID, tktypes.StrOrEmpty(msg.CorrelationId)) if log.IsTraceEnabled() { log.L(ctx).Tracef("transport %s message received: %s", t.name, protoToJSON(msg)) } - if err = t.deliverMessage(ctx, msg.Component, &components.TransportMessage{ - MessageID: msgID, - MessageType: msg.MessageType, - Component: msg.Component, - CorrelationID: pCorrelID, - Node: msg.Node, - ReplyTo: msg.ReplyTo, - Payload: msg.Payload, - }); err != nil { + if err = t.deliverMessage(ctx, msg.Component, msg); err != nil { return nil, err } return &prototk.ReceiveMessageResponse{}, nil } -func (t *transport) deliverMessage(ctx context.Context, destIdentity string, msg *components.TransportMessage) error { +func (t *transport) deliverMessage(ctx context.Context, component prototk.PaladinMsg_Component, msg *prototk.PaladinMsg) error { t.tm.destinationsMux.RLock() defer t.tm.destinationsMux.RUnlock() // TODO: Reconcile why we're using the identity as the component routing location - Broadhurst/Hosie discussion required - receiver, found := t.tm.destinations[destIdentity] + receiver, found := t.tm.components[component] if !found { - log.L(ctx).Errorf("Component not found: %s", msg.Component) - return i18n.NewError(ctx, msgs.MsgTransportDestinationNotFound, msg.Component) + log.L(ctx).Errorf("Component not found: %s", component) + return i18n.NewError(ctx, msgs.MsgTransportDestinationNotFound, component.String()) } - receiver.ReceiveTransportMessage(ctx, msg) + receiver.HandlePaladinMsg(ctx, msg) return nil } diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index 62d345340..367b02810 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -109,11 +109,8 @@ func TestDoubleRegisterReplaces(t *testing.T) { } -func testMessage() *components.TransportMessage { - return &components.TransportMessage{ - Node: "node2", - Component: "someComponent", - ReplyTo: "node1", +func testMessage() *components.FireAndForgetMessageSend { + return &components.FireAndForgetMessageSend{ CorrelationID: confutil.P(uuid.New()), MessageType: "myMessageType", Payload: []byte("something"), @@ -135,19 +132,16 @@ func TestSendMessage(t *testing.T) { message := testMessage() - sentMessages := make(chan *prototk.Message, 1) + sentMessages := make(chan *prototk.PaladinMsg, 1) tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { sent := req.Message assert.NotEmpty(t, sent.MessageId) assert.Equal(t, message.CorrelationID.String(), *sent.CorrelationId) - assert.Equal(t, message.Node, sent.Node) - assert.Equal(t, message.Component, sent.Component) - assert.Equal(t, message.ReplyTo, sent.ReplyTo) assert.Equal(t, message.Payload, sent.Payload) // ... if we didn't have a connection established we'd expect to come back to request the details gtdr, err := tp.t.GetTransportDetails(ctx, &prototk.GetTransportDetailsRequest{ - Node: message.Node, + Node: "node2", }) require.NoError(t, err) assert.NotEmpty(t, gtdr.TransportDetails) @@ -162,36 +156,6 @@ func TestSendMessage(t *testing.T) { <-sentMessages } -func TestSendMessageReplyToDefaultsToLocalNode(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { - mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ - { - Node: "node2", - Transport: "test1", - Details: `{"likely":"json stuff"}`, - }, - }, nil) - return nil - }) - defer done() - - message := testMessage() - message.ReplyTo = "" - - sentMessages := make(chan *prototk.Message, 1) - tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { - sent := req.Message - assert.Equal(t, message.ReplyTo, sent.ReplyTo) - sentMessages <- sent - return nil, nil - } - - err := tm.Send(ctx, message) - require.NoError(t, err) - - <-sentMessages -} - func TestSendMessageNotInit(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ @@ -288,12 +252,12 @@ func TestSendMessageDestWrong(t *testing.T) { message := testMessage() - message.Component = "some_component" + message.Component = prototk.PaladinMsg_TRANSACTION_ENGINE message.Node = "" err := tm.Send(ctx, message) assert.Regexp(t, "PD012007", err) - message.Component = "this_is_local" + message.Component = prototk.PaladinMsg_TRANSACTION_ENGINE message.Node = "node1" err = tm.Send(ctx, message) assert.Regexp(t, "PD012007", err) @@ -304,26 +268,26 @@ func TestSendInvalidMessageNoPayload(t *testing.T) { ctx, tm, _, done := newTestTransport(t) defer done() - message := &components.TransportMessage{} + message := &components.FireAndForgetMessageSend{} err := tm.Send(ctx, message) assert.Regexp(t, "PD012000", err) } func TestReceiveMessage(t *testing.T) { - receivedMessages := make(chan *components.TransportMessage, 1) + receivedMessages := make(chan *prototk.PaladinMsg, 1) ctx, _, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { receivingClient := componentmocks.NewTransportClient(t) receivingClient.On("Destination").Return("receivingClient1") - receivingClient.On("ReceiveTransportMessage", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { - receivedMessages <- args[1].(*components.TransportMessage) + receivingClient.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { + receivedMessages <- args[1].(*prototk.PaladinMsg) }) return receivingClient }) defer done() - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), Node: "node1", @@ -346,7 +310,7 @@ func TestReceiveMessageNoReceiver(t *testing.T) { ctx, _, tp, done := newTestTransport(t) defer done() - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), Node: "node1", @@ -366,7 +330,7 @@ func TestReceiveMessageInvalidDestination(t *testing.T) { ctx, _, tp, done := newTestTransport(t) defer done() - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), Component: "___", @@ -388,7 +352,7 @@ func TestReceiveMessageNotInit(t *testing.T) { tp.t.initialized.Store(false) - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), Component: "to", @@ -407,7 +371,7 @@ func TestReceiveMessageNoPayload(t *testing.T) { ctx, _, tp, done := newTestTransport(t) defer done() - msg := &prototk.Message{} + msg := &prototk.PaladinMsg{} _, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ Message: msg, }) @@ -418,7 +382,7 @@ func TestReceiveMessageWrongNode(t *testing.T) { ctx, _, tp, done := newTestTransport(t) defer done() - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ Component: "to", Node: "node2", ReplyTo: "node2", @@ -435,7 +399,7 @@ func TestReceiveMessageBadDestination(t *testing.T) { ctx, _, tp, done := newTestTransport(t) defer done() - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), Component: "to", Node: "node2", @@ -453,7 +417,7 @@ func TestReceiveMessageBadMsgID(t *testing.T) { ctx, _, tp, done := newTestTransport(t) defer done() - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ Component: "to", Node: "node1", ReplyTo: "node2", @@ -470,7 +434,7 @@ func TestReceiveMessageBadCorrelID(t *testing.T) { ctx, _, tp, done := newTestTransport(t) defer done() - msg := &prototk.Message{ + msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P("wrong"), Component: "to", diff --git a/core/go/pkg/testbed/testbed.go b/core/go/pkg/testbed/testbed.go index 8d5fc9b6f..479a1b9f2 100644 --- a/core/go/pkg/testbed/testbed.go +++ b/core/go/pkg/testbed/testbed.go @@ -160,7 +160,7 @@ func unitTestComponentManagerStart(ctx context.Context, conf *pldconf.PaladinCon return cm, err } -func (tb *testbed) ReceiveTransportMessage(context.Context, *components.TransportMessage) { +func (tb *testbed) HandlePaladinMsg(context.Context, *components.TransportMessage) { // no-op } diff --git a/core/go/pkg/testbed/testbed_engine_stubs.go b/core/go/pkg/testbed/testbed_engine_stubs.go index e79a550d2..b7541028f 100644 --- a/core/go/pkg/testbed/testbed_engine_stubs.go +++ b/core/go/pkg/testbed/testbed_engine_stubs.go @@ -22,9 +22,7 @@ import ( "time" "github.com/hyperledger/firefly-signer/pkg/abi" - "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/internal/statedistribution" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" @@ -152,16 +150,7 @@ func (tb *testbed) writeNullifiersToContext(dCtx components.DomainContext, tx *c return fmt.Errorf("testbed does not support states for remote nodes") } - // We construct a state distributor each time, but DO NOT START IT. - // TODO: State distributor needs to become a first class component with significant lifecycle activities - sd := statedistribution.NewStateDistributer(tb.ctx, - tb.c.TransportManager(), - tb.c.StateManager(), - tb.c.KeyManager(), - tb.c.Persistence(), - &pldconf.DistributerConfig{}, - ) - nullifiers, err := sd.BuildNullifiers(tb.ctx, distributions.Local) + nullifiers, err := tb.c.PrivateTxManager().BuildNullifiers(tb.ctx, distributions.Local) if err != nil { return err } diff --git a/toolkit/go/pkg/tktypes/rawjson_test.go b/toolkit/go/pkg/tktypes/rawjson_test.go index c1c8d5aee..e23e3966e 100644 --- a/toolkit/go/pkg/tktypes/rawjson_test.go +++ b/toolkit/go/pkg/tktypes/rawjson_test.go @@ -89,7 +89,7 @@ func TestRawJSON(t *testing.T) { } func TestProtoToJSON(t *testing.T) { - m := &prototk.Message{ + m := &prototk.PaladinMsg{ MessageId: "3d472892-8c5c-4290-910d-beeec5858e47", } assert.JSONEq(t, `{"messageId":"3d472892-8c5c-4290-910d-beeec5858e47"}`, ProtoToJSON(m).String()) diff --git a/toolkit/go/pkg/tktypes/string_validation.go b/toolkit/go/pkg/tktypes/string_validation.go index 1c7d2f588..45733f895 100644 --- a/toolkit/go/pkg/tktypes/string_validation.go +++ b/toolkit/go/pkg/tktypes/string_validation.go @@ -44,3 +44,10 @@ func ValidateSafeCharsStartEndAlphaNum(ctx context.Context, val string, maxLen i } return nil } + +func StrOrEmpty(s *string) string { + if s == nil { + return "" + } + return *s +} diff --git a/toolkit/go/pkg/tktypes/string_validation_test.go b/toolkit/go/pkg/tktypes/string_validation_test.go index 329e41995..8dcfc7b1f 100644 --- a/toolkit/go/pkg/tktypes/string_validation_test.go +++ b/toolkit/go/pkg/tktypes/string_validation_test.go @@ -51,3 +51,11 @@ func TestValidate64SafeCharsStartEndAlphaNum(t *testing.T) { err = ValidateSafeCharsStartEndAlphaNum(context.Background(), "not_Àll_ascii", DefaultNameMaxLen, "name") assert.Regexp(t, "PD020005.*name", err) } + +func TestStrOrEmpty(t *testing.T) { + + assert.Equal(t, "", StrOrEmpty(nil)) + tStr := "test" + assert.Equal(t, "test", StrOrEmpty(&tStr)) + +} diff --git a/toolkit/proto/protos/from_transport.proto b/toolkit/proto/protos/from_transport.proto index aaf799cd3..a8828a3a2 100644 --- a/toolkit/proto/protos/from_transport.proto +++ b/toolkit/proto/protos/from_transport.proto @@ -20,8 +20,7 @@ package io.kaleido.paladin.toolkit; import "to_transport.proto"; message ReceiveMessageRequest { - string node = 1; - PaladinMsg message = 2; + PaladinMsg message = 1; } message ReceiveMessageResponse { diff --git a/transports/grpc/internal/grpctransport/grpc_transport.go b/transports/grpc/internal/grpctransport/grpc_transport.go index 368f2b823..4c4b1c93e 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport.go +++ b/transports/grpc/internal/grpctransport/grpc_transport.go @@ -190,7 +190,6 @@ func (t *grpcTransport) ConnectSendStream(stream grpc.ClientStreamingServer[prot // Deliver it to Paladin _, err = t.callbacks.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Node: ai.verifiedNodeName, Message: &prototk.PaladinMsg{ MessageId: msg.MessageId, CorrelationId: msg.CorrelationId, From 6ea9915f1d3ff4598dbaa46181809eaada568898 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 29 Dec 2024 15:29:44 -0500 Subject: [PATCH 07/41] Kick off test reconcile for updated transport manager Signed-off-by: Peter Broadhurst --- core/go/internal/msgs/en_errors.go | 1 + core/go/internal/transportmgr/manager.go | 1 + core/go/internal/transportmgr/manager_test.go | 39 +++++++--- core/go/internal/transportmgr/peer.go | 17 ++-- .../internal/transportmgr/transport_test.go | 77 ++++++++----------- .../transportmgr/transportmgr_rpc_test.go | 2 +- toolkit/proto/protos/from_transport.proto | 3 +- .../internal/grpctransport/grpc_transport.go | 1 + .../grpctransport/grpc_transport_test.go | 1 + 9 files changed, 82 insertions(+), 60 deletions(-) diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index 843fe24eb..acf3a3bb4 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -368,6 +368,7 @@ var ( MsgTransportClientRegisterAfterStartup = ffe("PD012012", "Client '%s' attempted registration after startup") MsgTransportUnsupportedReliableMsg = ffe("PD012013", "Unsupported reliable message type '%s'") MsgTransportStateNotAvailableLocally = ffe("PD012014", "State not available locally: domain=%s,contract=%s,id=%s") + MsgTransportInvalidPeerInfo = ffe("PD012015", "Invalid peer info JSON returned by plugin") // RegistryManager module PD0121XX MsgRegistryNodeEntiresNotFound = ffe("PD012100", "No entries found for node '%s'") diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 043a38a1f..060c6730d 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -73,6 +73,7 @@ func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerCo transportsByID: make(map[uuid.UUID]*transport), transportsByName: make(map[string]*transport), components: make(map[prototk.PaladinMsg_Component]components.TransportClient), + peers: make(map[string]*peer), senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), reliableMessageResend: confutil.DurationMin(conf.ReliableMessageResend, 100*time.Millisecond, *pldconf.TransportManagerDefaults.ReliableMessageResend), sendShortRetry: retry.NewRetryLimited(&conf.SendRetry, &pldconf.TransportManagerDefaults.SendRetry), diff --git a/core/go/internal/transportmgr/manager_test.go b/core/go/internal/transportmgr/manager_test.go index ad6c78540..6c1a80714 100644 --- a/core/go/internal/transportmgr/manager_test.go +++ b/core/go/internal/transportmgr/manager_test.go @@ -24,6 +24,8 @@ import ( "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/mocks/componentmocks" + "github.com/kaleido-io/paladin/core/pkg/persistence" + "github.com/kaleido-io/paladin/core/pkg/persistence/mockpersistence" "github.com/sirupsen/logrus" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" @@ -35,22 +37,39 @@ import ( type mockComponents struct { c *componentmocks.AllComponents + db *mockpersistence.SQLMockProvider + p persistence.Persistence registryManager *componentmocks.RegistryManager + stateManager *componentmocks.StateManager } -func newMockComponents(t *testing.T) *mockComponents { +func newMockComponents(t *testing.T, realDB bool) *mockComponents { mc := &mockComponents{c: componentmocks.NewAllComponents(t)} mc.registryManager = componentmocks.NewRegistryManager(t) + mc.stateManager = componentmocks.NewStateManager(t) + if realDB { + p, cleanup, err := persistence.NewUnitTestPersistence(context.Background(), "transportmgr") + require.NoError(t, err) + t.Cleanup(cleanup) + mc.p = p + } else { + mdb, err := mockpersistence.NewSQLMockProvider() + require.NoError(t, err) + mc.db = mdb + mc.p = mdb.P + } + mc.c.On("Persistence").Return(mc.p).Maybe() mc.c.On("RegistryManager").Return(mc.registryManager).Maybe() + mc.c.On("StateManager").Return(mc.stateManager).Maybe() return mc } -func newTestTransportManager(t *testing.T, conf *pldconf.TransportManagerConfig, extraSetup ...func(mc *mockComponents) components.TransportClient) (context.Context, *transportManager, *mockComponents, func()) { +func newTestTransportManager(t *testing.T, realDB bool, conf *pldconf.TransportManagerConfig, extraSetup ...func(mc *mockComponents) components.TransportClient) (context.Context, *transportManager, *mockComponents, func()) { ctx, cancelCtx := context.WithCancel(context.Background()) oldLevel := logrus.GetLevel() logrus.SetLevel(logrus.TraceLevel) - mc := newMockComponents(t) + mc := newMockComponents(t, realDB) var clients []components.TransportClient for _, fn := range extraSetup { client := fn(mc) @@ -88,12 +107,12 @@ func newTestTransportManager(t *testing.T, conf *pldconf.TransportManagerConfig, func TestMissingName(t *testing.T) { tm := NewTransportManager(context.Background(), &pldconf.TransportManagerConfig{}) - _, err := tm.PreInit(newMockComponents(t).c) + _, err := tm.PreInit(newMockComponents(t, false).c) assert.Regexp(t, "PD012002", err) } func TestConfiguredTransports(t *testing.T) { - _, dm, _, done := newTestTransportManager(t, &pldconf.TransportManagerConfig{ + _, dm, _, done := newTestTransportManager(t, false, &pldconf.TransportManagerConfig{ NodeName: "node1", Transports: map[string]*pldconf.TransportConfig{ "test1": { @@ -115,7 +134,7 @@ func TestConfiguredTransports(t *testing.T) { } func TestTransportRegisteredNotFound(t *testing.T) { - _, dm, _, done := newTestTransportManager(t, &pldconf.TransportManagerConfig{ + _, dm, _, done := newTestTransportManager(t, false, &pldconf.TransportManagerConfig{ NodeName: "node1", Transports: map[string]*pldconf.TransportConfig{}, }) @@ -126,7 +145,7 @@ func TestTransportRegisteredNotFound(t *testing.T) { } func TestConfigureTransportFail(t *testing.T) { - _, tm, _, done := newTestTransportManager(t, &pldconf.TransportManagerConfig{ + _, tm, _, done := newTestTransportManager(t, false, &pldconf.TransportManagerConfig{ NodeName: "node1", Transports: map[string]*pldconf.TransportConfig{ "test1": { @@ -151,7 +170,7 @@ func TestDoubleRegisterClient(t *testing.T) { tm := NewTransportManager(context.Background(), &pldconf.TransportManagerConfig{}) receivingClient := componentmocks.NewTransportClient(t) - receivingClient.On("Destination").Return("receivingClient1") + receivingClient.On("Destination").Return(prototk.PaladinMsg_TRANSACTION_ENGINE) err := tm.RegisterClient(context.Background(), receivingClient) require.NoError(t, err) @@ -165,7 +184,7 @@ func TestDoubleRegisterAfterStart(t *testing.T) { tm.(*transportManager).destinationsFixed = true receivingClient := componentmocks.NewTransportClient(t) - receivingClient.On("Destination").Return("receivingClient1") + receivingClient.On("Destination").Return(prototk.PaladinMsg_TRANSACTION_ENGINE) err := tm.RegisterClient(context.Background(), receivingClient) assert.Regexp(t, "PD012012", err) @@ -179,7 +198,7 @@ func TestGetLocalTransportDetailsNotFound(t *testing.T) { } func TestGetLocalTransportDetailsNotFail(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t) + ctx, tm, tp, done := newTestTransport(t, false) defer done() tp.Functions.GetLocalDetails = func(ctx context.Context, gldr *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) { diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 4055e72a5..7e8100956 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -91,7 +91,7 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer p = tm.peers[nodeName] if p != nil { // There was a race to connect to this peer, and the other routine won - log.L(ctx).Debugf("connection already active for peer '%s' (aft4er connection race)", nodeName) + log.L(ctx).Debugf("connection already active for peer '%s' (after connection race)", nodeName) return p, nil } @@ -140,15 +140,17 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer NodeName: nodeName, TransportDetails: remoteTransportDetails, }) - if err == nil { - err = json.Unmarshal([]byte(res.PeerInfoJson), &p.peerInfo) - } if err != nil { return nil, err } + if err = json.Unmarshal([]byte(res.PeerInfoJson), &p.peerInfo); err != nil { + log.L(ctx).Errorf("Invalid peerInfo: %s", p.peerInfo) + return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidPeerInfo) + } log.L(ctx).Debugf("connected to peer '%s'", nodeName) tm.peers[nodeName] = p + go p.sender() return p, nil } @@ -159,7 +161,7 @@ func (p *peer) notifyPersistedMsgAvailable() { } } -func (p *peer) stateDistributionMsg(rm *components.ReliableMessage, targetNode string, sd *components.StateDistributionWithData) *prototk.PaladinMsg { +func (p *peer) stateDistributionMsg(rm *components.ReliableMessage, sd *components.StateDistributionWithData) *prototk.PaladinMsg { payload, _ := json.Marshal(sd) return &prototk.PaladinMsg{ MessageId: rm.ID.String(), @@ -358,6 +360,11 @@ func (p *peer) sender() { if err != nil { return // context closed } + + // TODO: + // - Send fire & forget messages + // - Unregister selves on stop + // - Stop automatically on idle timeout } } diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index 367b02810..ad614c19d 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -51,9 +51,9 @@ func newTestPlugin(transportFuncs *plugintk.TransportAPIFunctions) *testPlugin { } } -func newTestTransport(t *testing.T, extraSetup ...func(mc *mockComponents) components.TransportClient) (context.Context, *transportManager, *testPlugin, func()) { +func newTestTransport(t *testing.T, realDB bool, extraSetup ...func(mc *mockComponents) components.TransportClient) (context.Context, *transportManager, *testPlugin, func()) { - ctx, tm, _, done := newTestTransportManager(t, &pldconf.TransportManagerConfig{ + ctx, tm, _, done := newTestTransportManager(t, realDB, &pldconf.TransportManagerConfig{ NodeName: "node1", Transports: map[string]*pldconf.TransportConfig{ "test1": { @@ -89,7 +89,7 @@ func registerTestTransport(t *testing.T, tm *transportManager, tp *testPlugin) { func TestDoubleRegisterReplaces(t *testing.T) { - _, rm, tp0, done := newTestTransport(t) + _, rm, tp0, done := newTestTransport(t, false) defer done() assert.Nil(t, tp0.t.initError.Load()) assert.True(t, tp0.initialized.Load()) @@ -111,6 +111,7 @@ func TestDoubleRegisterReplaces(t *testing.T) { func testMessage() *components.FireAndForgetMessageSend { return &components.FireAndForgetMessageSend{ + Node: "node2", CorrelationID: confutil.P(uuid.New()), MessageType: "myMessageType", Payload: []byte("something"), @@ -118,7 +119,7 @@ func testMessage() *components.FireAndForgetMessageSend { } func TestSendMessage(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { + ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node2", @@ -133,6 +134,12 @@ func TestSendMessage(t *testing.T) { message := testMessage() sentMessages := make(chan *prototk.PaladinMsg, 1) + tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + return &prototk.ActivateNodeResponse{PeerInfoJson: `{"endpoint":"some.url"}`}, nil + } + tp.Functions.DeactivateNode = func(ctx context.Context, dnr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + return &prototk.DeactivateNodeResponse{}, nil + } tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { sent := req.Message assert.NotEmpty(t, sent.MessageId) @@ -157,7 +164,7 @@ func TestSendMessage(t *testing.T) { } func TestSendMessageNotInit(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { + ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -179,7 +186,7 @@ func TestSendMessageNotInit(t *testing.T) { } func TestSendMessageFail(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { + ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -203,7 +210,7 @@ func TestSendMessageFail(t *testing.T) { } func TestSendMessageDestNotFound(t *testing.T) { - ctx, tm, _, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return(nil, fmt.Errorf("not found")) return nil }) @@ -217,7 +224,7 @@ func TestSendMessageDestNotFound(t *testing.T) { } func TestSendMessageDestNotAvailable(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { + ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -247,7 +254,7 @@ func TestSendMessageDestNotAvailable(t *testing.T) { } func TestSendMessageDestWrong(t *testing.T) { - ctx, tm, _, done := newTestTransport(t) + ctx, tm, _, done := newTestTransport(t, false) defer done() message := testMessage() @@ -265,7 +272,7 @@ func TestSendMessageDestWrong(t *testing.T) { } func TestSendInvalidMessageNoPayload(t *testing.T) { - ctx, tm, _, done := newTestTransport(t) + ctx, tm, _, done := newTestTransport(t, false) defer done() message := &components.FireAndForgetMessageSend{} @@ -277,7 +284,7 @@ func TestSendInvalidMessageNoPayload(t *testing.T) { func TestReceiveMessage(t *testing.T) { receivedMessages := make(chan *prototk.PaladinMsg, 1) - ctx, _, tp, done := newTestTransport(t, func(mc *mockComponents) components.TransportClient { + ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { receivingClient := componentmocks.NewTransportClient(t) receivingClient.On("Destination").Return("receivingClient1") receivingClient.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { @@ -290,9 +297,7 @@ func TestReceiveMessage(t *testing.T) { msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), - Node: "node1", - Component: "receivingClient1", - ReplyTo: "node2", + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, MessageType: "myMessageType", Payload: []byte("some data"), } @@ -307,15 +312,13 @@ func TestReceiveMessage(t *testing.T) { } func TestReceiveMessageNoReceiver(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), - Node: "node1", - Component: "receivingClient1", - ReplyTo: "node2", + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, MessageType: "myMessageType", Payload: []byte("some data"), } @@ -327,15 +330,13 @@ func TestReceiveMessageNoReceiver(t *testing.T) { } func TestReceiveMessageInvalidDestination(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), - Component: "___", - Node: "node1", - ReplyTo: "node2", + Component: prototk.PaladinMsg_Component(42), MessageType: "myMessageType", Payload: []byte("some data"), } @@ -347,7 +348,7 @@ func TestReceiveMessageInvalidDestination(t *testing.T) { } func TestReceiveMessageNotInit(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() tp.t.initialized.Store(false) @@ -355,9 +356,7 @@ func TestReceiveMessageNotInit(t *testing.T) { msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P(uuid.NewString()), - Component: "to", - Node: "node1", - ReplyTo: "node2", + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, MessageType: "myMessageType", Payload: []byte("some data"), } @@ -368,7 +367,7 @@ func TestReceiveMessageNotInit(t *testing.T) { } func TestReceiveMessageNoPayload(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{} @@ -379,13 +378,11 @@ func TestReceiveMessageNoPayload(t *testing.T) { } func TestReceiveMessageWrongNode(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{ - Component: "to", - Node: "node2", - ReplyTo: "node2", + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, MessageType: "myMessageType", Payload: []byte("some data"), } @@ -396,14 +393,12 @@ func TestReceiveMessageWrongNode(t *testing.T) { } func TestReceiveMessageBadDestination(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), - Component: "to", - Node: "node2", - ReplyTo: "node1", + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, MessageType: "myMessageType", Payload: []byte("some data"), } @@ -414,13 +409,11 @@ func TestReceiveMessageBadDestination(t *testing.T) { } func TestReceiveMessageBadMsgID(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{ - Component: "to", - Node: "node1", - ReplyTo: "node2", + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, MessageType: "myMessageType", Payload: []byte("some data"), } @@ -431,15 +424,13 @@ func TestReceiveMessageBadMsgID(t *testing.T) { } func TestReceiveMessageBadCorrelID(t *testing.T) { - ctx, _, tp, done := newTestTransport(t) + ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), CorrelationId: confutil.P("wrong"), - Component: "to", - Node: "node1", - ReplyTo: "node2", + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, MessageType: "myMessageType", Payload: []byte("some data"), } diff --git a/core/go/internal/transportmgr/transportmgr_rpc_test.go b/core/go/internal/transportmgr/transportmgr_rpc_test.go index b4fe6e847..1feb7a158 100644 --- a/core/go/internal/transportmgr/transportmgr_rpc_test.go +++ b/core/go/internal/transportmgr/transportmgr_rpc_test.go @@ -31,7 +31,7 @@ import ( ) func TestRPCLocalDetails(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t) + ctx, tm, tp, done := newTestTransport(t, false) defer done() rpc, rpcDone := newTestRPCServer(t, ctx, tm) diff --git a/toolkit/proto/protos/from_transport.proto b/toolkit/proto/protos/from_transport.proto index a8828a3a2..3c5f9cd36 100644 --- a/toolkit/proto/protos/from_transport.proto +++ b/toolkit/proto/protos/from_transport.proto @@ -20,7 +20,8 @@ package io.kaleido.paladin.toolkit; import "to_transport.proto"; message ReceiveMessageRequest { - PaladinMsg message = 1; + string from_node = 1; + PaladinMsg message = 2; } message ReceiveMessageResponse { diff --git a/transports/grpc/internal/grpctransport/grpc_transport.go b/transports/grpc/internal/grpctransport/grpc_transport.go index 4c4b1c93e..5e358e6c9 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport.go +++ b/transports/grpc/internal/grpctransport/grpc_transport.go @@ -190,6 +190,7 @@ func (t *grpcTransport) ConnectSendStream(stream grpc.ClientStreamingServer[prot // Deliver it to Paladin _, err = t.callbacks.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ + FromNode: ai.verifiedNodeName, Message: &prototk.PaladinMsg{ MessageId: msg.MessageId, CorrelationId: msg.CorrelationId, diff --git a/transports/grpc/internal/grpctransport/grpc_transport_test.go b/transports/grpc/internal/grpctransport/grpc_transport_test.go index 3fed123ba..ce949b7d0 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport_test.go +++ b/transports/grpc/internal/grpctransport/grpc_transport_test.go @@ -150,6 +150,7 @@ func TestConnectFail(t *testing.T) { plugin1, plugin2, done := newSuccessfulVerifiedConnection(t, func(_, callbacks2 *testCallbacks) { callbacks2.receiveMessage = func(ctx context.Context, rmr *prototk.ReceiveMessageRequest) (*prototk.ReceiveMessageResponse, error) { + require.Equal(t, "node1", rmr.FromNode) return &prototk.ReceiveMessageResponse{}, nil } }) From 1169abc1f126470afa7decee7a7647f58cf11b30 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Mon, 30 Dec 2024 10:28:19 -0500 Subject: [PATCH 08/41] Work through send loop logic Signed-off-by: Peter Broadhurst --- config/pkg/pldconf/transportmgr.go | 2 + core/go/internal/transportmgr/manager.go | 26 ++++--- core/go/internal/transportmgr/peer.go | 87 +++++++++++++++++++++--- 3 files changed, 98 insertions(+), 17 deletions(-) diff --git a/config/pkg/pldconf/transportmgr.go b/config/pkg/pldconf/transportmgr.go index d1666d7cc..bf8b6c8d3 100644 --- a/config/pkg/pldconf/transportmgr.go +++ b/config/pkg/pldconf/transportmgr.go @@ -19,6 +19,7 @@ import "github.com/kaleido-io/paladin/config/pkg/confutil" type TransportManagerConfig struct { NodeName string `json:"nodeName"` SendQueueLen *int `json:"sendQueueLen"` + PeerInactivityTimeout *string `json:"peerInactivityTimeout"` SendRetry RetryConfigWithMax `json:"sendRetry"` ReliableScanRetry RetryConfig `json:"reliableScanRetry"` ReliableMessageResend *string `json:"reliableMessageResend"` @@ -32,6 +33,7 @@ type TransportInitConfig struct { var TransportManagerDefaults = &TransportManagerConfig{ SendQueueLen: confutil.P(10), ReliableMessageResend: confutil.P("30s"), + PeerInactivityTimeout: confutil.P("1m"), ReliableScanRetry: GenericRetryDefaults.RetryConfig, // SendRetry defaults are deliberately short SendRetry: RetryConfigWithMax{ diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 060c6730d..c204fa350 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -58,8 +58,10 @@ type transportManager struct { peersLock sync.RWMutex peers map[string]*peer - sendShortRetry *retry.Retry - reliableScanRetry *retry.Retry + sendShortRetry *retry.Retry + reliableScanRetry *retry.Retry + peerInactivityTimeout time.Duration + quiesceTimeout time.Duration senderBufferLen int reliableMessageResend time.Duration @@ -78,6 +80,8 @@ func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerCo reliableMessageResend: confutil.DurationMin(conf.ReliableMessageResend, 100*time.Millisecond, *pldconf.TransportManagerDefaults.ReliableMessageResend), sendShortRetry: retry.NewRetryLimited(&conf.SendRetry, &pldconf.TransportManagerDefaults.SendRetry), reliableScanRetry: retry.NewRetryIndefinite(&conf.ReliableScanRetry, &pldconf.TransportManagerDefaults.ReliableScanRetry), + peerInactivityTimeout: confutil.DurationMin(conf.PeerInactivityTimeout, 0, *pldconf.TransportManagerDefaults.PeerInactivityTimeout), + quiesceTimeout: 1 * time.Second, // not currently tunable (considered very small edge case) } } @@ -225,12 +229,6 @@ func (tm *transportManager) Send(ctx context.Context, send *components.FireAndFo return i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } - // Use or establish a peer connection for the send - peer, err := tm.getPeer(ctx, send.Node) - if err != nil { - return err - } - msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), MessageType: send.MessageType, @@ -242,13 +240,23 @@ func (tm *transportManager) Send(ctx context.Context, send *components.FireAndFo msg.CorrelationId = &cidStr } + return tm.queueFireAndForget(ctx, send.Node, msg) +} + +func (tm *transportManager) queueFireAndForget(ctx context.Context, nodeName string, msg *prototk.PaladinMsg) error { + // Use or establish a peer connection for the send + peer, err := tm.getPeer(ctx, nodeName) + if err != nil { + return err + } + // Push the send to the peer - this is a best effort interaction. // There is some retry in the Paladin layer, and some transports provide resilience. // However, the send is at-most-once, and the higher level message protocols that // use this "send" must be fault tolerant to message loss. select { case peer.sendQueue <- msg: - log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageId, send.CorrelationID, peer.name) + log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageId, tktypes.StrOrEmpty(msg.CorrelationId), peer.name) return nil case <-ctx.Done(): return i18n.NewError(ctx, msgs.MsgContextCanceled) diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 7e8100956..676daa3a5 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -44,10 +44,12 @@ type peer struct { sendQueue chan *prototk.PaladinMsg // Send loop state (no lock as only used on the loop) - lastFullScan time.Time - lastDrainHWM *tktypes.Timestamp + lastFullScan time.Time + lastDrainHWM *tktypes.Timestamp + persistentMsgsDrained bool - done chan struct{} + quiescing bool + done chan struct{} } type nameSortedPeers []*peer @@ -178,13 +180,40 @@ func (p *peer) send(msg *prototk.PaladinMsg) error { } func (p *peer) senderDone() { + p.deactivate() + + // There's a very small window where we might have got delivered a message by a routine + // that got us out of the map before we deactivated. + // In this edge case, we need to spin off the new peer connection to replace us. + for p.quiescing { + select { + case msg := <-p.sendQueue: + log.L(p.ctx).Infof("message delivered in inactivity quiesce window. Re-connecting") + _ = p.tm.queueFireAndForget(p.ctx, p.name, msg) + case <-p.persistedMsgsAvailable: + log.L(p.ctx).Infof("reliable message delivered in inactivity quiesce window. Re-connecting") + _, _ = p.tm.getPeer(p.ctx, p.name) + case <-time.After(p.tm.quiesceTimeout): + p.quiescing = false + } + } + + close(p.done) +} + +func (p *peer) deactivate() { + // Hold the peers write lock to do this + p.tm.peersLock.Lock() + defer p.tm.peersLock.Unlock() + delete(p.tm.peers, p.name) + + // Holding the lock while activating/deactivating ensures we never dual-activate in the transport log.L(p.ctx).Infof("peer %s deactivating", p.name) if _, err := p.transport.api.DeactivateNode(p.ctx, &prototk.DeactivateNodeRequest{ NodeName: p.name, }); err != nil { log.L(p.ctx).Warnf("peer %s returned deactivation error: %s", p.name, err) } - close(p.done) } func (p *peer) reliableMessageScan() error { @@ -232,6 +261,7 @@ func (p *peer) reliableMessageScan() error { } if len(page) > 0 { + p.persistentMsgsDrained = false // we know there's some messages total += len(page) lastPageEnd = &page[len(page)-1].Created } @@ -253,6 +283,9 @@ func (p *peer) reliableMessageScan() error { // Record the last full scan if fullScan { + // We only know we're empty when we do a full re-scan, and that comes back empty + p.persistentMsgsDrained = (total == 0) + p.lastFullScan = time.Now() } @@ -296,6 +329,15 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e msgsToSend := make([]*prototk.PaladinMsg, 0, len(page)) var errorAcks []*components.ReliableMessageAck for _, rm := range page { + + // Check it's either after our HWM, or eligible for re-send + afterHWM := p.lastDrainHWM == nil || *p.lastDrainHWM < rm.Created + if !afterHWM && time.Since(rm.Created.Time()) < p.tm.reliableMessageResend { + log.L(p.ctx).Infof("Unacknowledged message %s not yet eligible for re-send", rm.ID) + continue + } + + // Process it var msg *prototk.PaladinMsg var errorAck error switch rm.MessageType.V() { @@ -351,6 +393,7 @@ func (p *peer) sender() { log.L(p.ctx).Infof("peer %s active", p.name) + hitInactivityTimeout := false for { // We send/resend any reliable messages queued up first @@ -361,10 +404,38 @@ func (p *peer) sender() { return // context closed } - // TODO: - // - Send fire & forget messages - // - Unregister selves on stop - // - Stop automatically on idle timeout + // Depending on our persistent message status, check if we're able to quiesce + if hitInactivityTimeout && p.persistentMsgsDrained { + p.quiescing = true + return // quiesce handling is in senderDone() deferred function + } + hitInactivityTimeout = false + + // Our wait timeout needs to be the shortest of: + // - The full re-scan timeout for reliable messages + // - The inactivity timeout + inactivityTimeout := p.tm.reliableMessageResend + if inactivityTimeout > p.tm.peerInactivityTimeout { + inactivityTimeout = p.tm.peerInactivityTimeout + } + inactivityTimer := time.NewTimer(inactivityTimeout) + processingMsgs := true + for processingMsgs { + select { + case <-inactivityTimer.C: + hitInactivityTimeout = true + processingMsgs = false // spin round and check if we have persisted messages to (re)process + case <-p.persistedMsgsAvailable: + processingMsgs = false // spin round and get the messages + case <-p.ctx.Done(): + return // we're done + case msg := <-p.sendQueue: + // send and spin straight round + if err := p.send(msg); err != nil { + log.L(p.ctx).Errorf("failed to send message '%s' after short retry (discarding): %s", msg.MessageId, err) + } + } + } } } From 582b72fa20fd33c968257bfcfece88403fcdf04c Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Tue, 31 Dec 2024 14:27:06 -0500 Subject: [PATCH 09/41] Fix gorm annotations Signed-off-by: Peter Broadhurst --- core/go/internal/components/transportmgr.go | 6 +- core/go/internal/transportmgr/manager.go | 17 +++- core/go/internal/transportmgr/transport.go | 3 - .../internal/transportmgr/transport_test.go | 94 ++++++++++++------- 4 files changed, 74 insertions(+), 46 deletions(-) diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index f0485c2f9..152cf71c7 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -53,10 +53,10 @@ type ReliableMessage struct { ReplyTo string `json:"replyTo" gorm:"column:reply_to"` // The identity to respond to on the sending node MessageType tktypes.Enum[ReliableMessageType] `json:"messageType" gorm:"column:msg_type"` Metadata tktypes.RawJSON `json:"metadata" gorm:"column:metadata"` - Ack *ReliableMessageAck `json:"ack,omitempty" gorm:"foreignKey:id;references:id;"` + Ack *ReliableMessageAck `json:"ack,omitempty" gorm:"foreignKey:MessageID;references:ID;"` } -func (rm ReliableMessage) Table() string { +func (rm ReliableMessage) TableName() string { return "reliable_msgs" } @@ -66,7 +66,7 @@ type ReliableMessageAck struct { Error string `json:"error,omitempty" gorm:"column:error,autoCreateTime:false"` } -func (rma ReliableMessageAck) Table() string { +func (rma ReliableMessageAck) TableName() string { return "reliable_msg_acks" } diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index c204fa350..812c0ab5d 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -114,6 +114,12 @@ func (tm *transportManager) Start() error { } func (tm *transportManager) Stop() { + + peers := tm.listActivePeers() + for _, p := range peers { + p.close() + } + tm.mux.Lock() var allTransports []*transport for _, t := range tm.transportsByID { @@ -244,8 +250,11 @@ func (tm *transportManager) Send(ctx context.Context, send *components.FireAndFo } func (tm *transportManager) queueFireAndForget(ctx context.Context, nodeName string, msg *prototk.PaladinMsg) error { - // Use or establish a peer connection for the send - peer, err := tm.getPeer(ctx, nodeName) + // Use or establish a p connection for the send + p, err := tm.getPeer(ctx, nodeName) + if err == nil { + err = p.transport.checkInit(ctx) + } if err != nil { return err } @@ -255,8 +264,8 @@ func (tm *transportManager) queueFireAndForget(ctx context.Context, nodeName str // However, the send is at-most-once, and the higher level message protocols that // use this "send" must be fault tolerant to message loss. select { - case peer.sendQueue <- msg: - log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageId, tktypes.StrOrEmpty(msg.CorrelationId), peer.name) + case p.sendQueue <- msg: + log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageId, tktypes.StrOrEmpty(msg.CorrelationId), p.name) return nil case <-ctx.Done(): return i18n.NewError(ctx, msgs.MsgContextCanceled) diff --git a/core/go/internal/transportmgr/transport.go b/core/go/internal/transportmgr/transport.go index 3bfbee476..01b6f3353 100644 --- a/core/go/internal/transportmgr/transport.go +++ b/core/go/internal/transportmgr/transport.go @@ -98,9 +98,6 @@ func (t *transport) checkInit(ctx context.Context) error { } func (t *transport) send(ctx context.Context, nodeName string, msg *prototk.PaladinMsg) error { - if err := t.checkInit(ctx); err != nil { - return err - } _, err := t.api.SendMessage(ctx, &prototk.SendMessageRequest{ Node: nodeName, diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index ad614c19d..eac7e2fea 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -21,6 +21,7 @@ import ( "sync/atomic" "testing" + "github.com/DATA-DOG/go-sqlmock" "github.com/google/uuid" "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" @@ -118,28 +119,40 @@ func testMessage() *components.FireAndForgetMessageSend { } } -func TestSendMessage(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { - mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ - { - Node: "node2", - Transport: "test1", - Details: `{"likely":"json stuff"}`, - }, - }, nil) - return nil - }) - defer done() - - message := testMessage() +func mockEmptyReliableMsgs(mc *mockComponents) components.TransportClient { + mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnRows(sqlmock.NewRows([]string{})) + mc.db.Mock.MatchExpectationsInOrder(false) + return nil +} - sentMessages := make(chan *prototk.PaladinMsg, 1) +func mockActivateDeactivateOk(tp *testPlugin) { tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { return &prototk.ActivateNodeResponse{PeerInfoJson: `{"endpoint":"some.url"}`}, nil } tp.Functions.DeactivateNode = func(ctx context.Context, dnr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { return &prototk.DeactivateNodeResponse{}, nil } +} + +func TestSendMessage(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockEmptyReliableMsgs, + func(mc *mockComponents) components.TransportClient { + mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ + { + Node: "node2", + Transport: "test1", + Details: `{"likely":"json stuff"}`, + }, + }, nil) + return nil + }) + defer done() + + message := testMessage() + + sentMessages := make(chan *prototk.PaladinMsg, 1) + mockActivateDeactivateOk(tp) tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { sent := req.Message assert.NotEmpty(t, sent.MessageId) @@ -164,48 +177,57 @@ func TestSendMessage(t *testing.T) { } func TestSendMessageNotInit(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { - mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ - { - Node: "node1", - Transport: "test1", - Details: `{"likely":"json stuff"}`, - }, - }, nil) - return nil - }) + ctx, tm, tp, done := newTestTransport(t, false, + mockEmptyReliableMsgs, + func(mc *mockComponents) components.TransportClient { + mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ + { + Node: "node1", + Transport: "test1", + Details: `{"likely":"json stuff"}`, + }, + }, nil) + return nil + }) defer done() tp.t.initialized.Store(false) message := testMessage() + mockActivateDeactivateOk(tp) err := tm.Send(ctx, message) assert.Regexp(t, "PD011601", err) } func TestSendMessageFail(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { - mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ - { - Node: "node1", - Transport: "test1", - Details: `{"likely":"json stuff"}`, - }, - }, nil) - return nil - }) + ctx, tm, tp, done := newTestTransport(t, false, + mockEmptyReliableMsgs, + func(mc *mockComponents) components.TransportClient { + mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ + { + Node: "node1", + Transport: "test1", + Details: `{"likely":"json stuff"}`, + }, + }, nil) + return nil + }) defer done() + sent := make(chan struct{}) tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + close(sent) return nil, fmt.Errorf("pop") } message := testMessage() + mockActivateDeactivateOk(tp) err := tm.Send(ctx, message) - assert.Regexp(t, "pop", err) + assert.NoError(t, err) + <-sent } From 3342176e7afe454abce5949971a98bd2a75637df Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Tue, 31 Dec 2024 14:40:53 -0500 Subject: [PATCH 10/41] Original tests all re-instated Signed-off-by: Peter Broadhurst --- core/go/internal/msgs/en_errors.go | 2 +- core/go/internal/transportmgr/transport.go | 10 ++++++++- .../internal/transportmgr/transport_test.go | 21 +++---------------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index acf3a3bb4..5807cc741 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -364,7 +364,7 @@ var ( MsgTransportInvalidReplyToSend = ffe("PD012008", "Message has invalid replyTo destination: '%s'") MsgTransportInvalidLocalNode = ffe("PD012009", "Node '%s' is the local node") MsgTransportClientAlreadyRegistered = ffe("PD012010", "Client '%s' already registered") - MsgTransportDestinationNotFound = ffe("PD012011", "Destination '%s' not found") + MsgTransportComponentNotFound = ffe("PD012011", "Component '%s' not found") MsgTransportClientRegisterAfterStartup = ffe("PD012012", "Client '%s' attempted registration after startup") MsgTransportUnsupportedReliableMsg = ffe("PD012013", "Unsupported reliable message type '%s'") MsgTransportStateNotAvailableLocally = ffe("PD012014", "State not available locally: domain=%s,contract=%s,id=%s") diff --git a/core/go/internal/transportmgr/transport.go b/core/go/internal/transportmgr/transport.go index 01b6f3353..35a6b903e 100644 --- a/core/go/internal/transportmgr/transport.go +++ b/core/go/internal/transportmgr/transport.go @@ -143,6 +143,14 @@ func (t *transport) ReceiveMessage(ctx context.Context, req *prototk.ReceiveMess return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } + if msg.CorrelationId != nil { + _, err := uuid.Parse(*msg.CorrelationId) + if err != nil { + log.L(ctx).Errorf("Invalid correlationId from transport: %s", protoToJSON(msg)) + return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) + } + } + log.L(ctx).Debugf("transport %s message received id=%s (cid=%s)", t.name, msgID, tktypes.StrOrEmpty(msg.CorrelationId)) if log.IsTraceEnabled() { log.L(ctx).Tracef("transport %s message received: %s", t.name, protoToJSON(msg)) @@ -163,7 +171,7 @@ func (t *transport) deliverMessage(ctx context.Context, component prototk.Paladi receiver, found := t.tm.components[component] if !found { log.L(ctx).Errorf("Component not found: %s", component) - return i18n.NewError(ctx, msgs.MsgTransportDestinationNotFound, component.String()) + return i18n.NewError(ctx, msgs.MsgTransportComponentNotFound, component.String()) } receiver.HandlePaladinMsg(ctx, msg) diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index eac7e2fea..f636bc8f3 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -308,7 +308,7 @@ func TestReceiveMessage(t *testing.T) { ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { receivingClient := componentmocks.NewTransportClient(t) - receivingClient.On("Destination").Return("receivingClient1") + receivingClient.On("Destination").Return(prototk.PaladinMsg_TRANSACTION_ENGINE) receivingClient.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { receivedMessages <- args[1].(*prototk.PaladinMsg) }) @@ -399,35 +399,20 @@ func TestReceiveMessageNoPayload(t *testing.T) { assert.Regexp(t, "PD012000", err) } -func TestReceiveMessageWrongNode(t *testing.T) { - ctx, _, tp, done := newTestTransport(t, false) - defer done() - - msg := &prototk.PaladinMsg{ - Component: prototk.PaladinMsg_TRANSACTION_ENGINE, - MessageType: "myMessageType", - Payload: []byte("some data"), - } - _, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Message: msg, - }) - assert.Regexp(t, "PD012005", err) -} - func TestReceiveMessageBadDestination(t *testing.T) { ctx, _, tp, done := newTestTransport(t, false) defer done() msg := &prototk.PaladinMsg{ MessageId: uuid.NewString(), - Component: prototk.PaladinMsg_TRANSACTION_ENGINE, + Component: prototk.PaladinMsg_Component(42), MessageType: "myMessageType", Payload: []byte("some data"), } _, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ Message: msg, }) - assert.Regexp(t, "PD012005", err) + assert.Regexp(t, "PD012011", err) } func TestReceiveMessageBadMsgID(t *testing.T) { From 92bd1f136bb96582b51870da0786b7347ef7abc7 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Tue, 31 Dec 2024 15:25:30 -0500 Subject: [PATCH 11/41] Reconcile logic for node target validation Signed-off-by: Peter Broadhurst --- core/go/internal/components/transportmgr.go | 9 +- core/go/internal/msgs/en_errors.go | 3 +- core/go/internal/transportmgr/manager.go | 3 +- core/go/internal/transportmgr/peer.go | 11 ++- .../internal/transportmgr/transport_test.go | 82 ++++++++++++++++--- 5 files changed, 89 insertions(+), 19 deletions(-) diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index 152cf71c7..f0ba2d03f 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -42,8 +42,15 @@ const ( RMTReceipt ReliableMessageType = "receipt" ) +func (t ReliableMessageType) Enum() tktypes.Enum[ReliableMessageType] { + return tktypes.Enum[ReliableMessageType](t) +} + func (t ReliableMessageType) Options() []string { - return []string{} + return []string{ + string(RMTState), + string(RMTReceipt), + } } type ReliableMessage struct { diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index 5807cc741..d326b6562 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -360,7 +360,7 @@ var ( MsgTransportNoTransportsConfiguredForNode = ffe("PD012003", "None of the transports registered by node '%s' are configured locally on this node: %v") MsgTransportDetailsNotAvailable = ffe("PD012004", "Transport '%s' not available for node '%s'") MsgTransportInvalidReplyToReceived = ffe("PD012006", "Message received with invalid replyTo destination: '%s'") - MsgTransportInvalidDestinationSend = ffe("PD012007", "Message has invalid destination for sending from local node '%s': '%s'") + MsgTransportSendLocalNode = ffe("PD012007", "Attempt to send message to local node '%s'") MsgTransportInvalidReplyToSend = ffe("PD012008", "Message has invalid replyTo destination: '%s'") MsgTransportInvalidLocalNode = ffe("PD012009", "Node '%s' is the local node") MsgTransportClientAlreadyRegistered = ffe("PD012010", "Client '%s' already registered") @@ -369,6 +369,7 @@ var ( MsgTransportUnsupportedReliableMsg = ffe("PD012013", "Unsupported reliable message type '%s'") MsgTransportStateNotAvailableLocally = ffe("PD012014", "State not available locally: domain=%s,contract=%s,id=%s") MsgTransportInvalidPeerInfo = ffe("PD012015", "Invalid peer info JSON returned by plugin") + MsgTransportInvalidTargetNode = ffe("PD012016", "Invalid target node '%s'") // RegistryManager module PD0121XX MsgRegistryNodeEntiresNotFound = ffe("PD012100", "No entries found for node '%s'") diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 812c0ab5d..4c4ac0d74 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -229,8 +229,7 @@ func (tm *transportManager) LocalNodeName() string { func (tm *transportManager) Send(ctx context.Context, send *components.FireAndForgetMessageSend) error { // Check the message is valid - if len(send.MessageType) == 0 || - len(send.Payload) == 0 { + if len(send.Payload) == 0 { log.L(ctx).Errorf("Invalid message send request %+v", send) return i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 676daa3a5..01ceb4270 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -79,6 +79,13 @@ func (tm *transportManager) getActivePeer(nodeName string) *peer { func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer, error) { + if err := tktypes.ValidateSafeCharsStartEndAlphaNum(ctx, nodeName, tktypes.DefaultNameMaxLen, "node"); err != nil { + return nil, i18n.WrapError(ctx, err, msgs.MsgTransportInvalidTargetNode, nodeName) + } + if nodeName == tm.localNodeName { + return nil, i18n.NewError(ctx, msgs.MsgTransportSendLocalNode, tm.localNodeName) + } + // Hopefully this is an already active connection p := tm.getActivePeer(nodeName) if p != nil { @@ -109,10 +116,6 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer p.ctx, p.cancelCtx = context.WithCancel( log.WithLogField(tm.bgCtx /* go-routine need bg context*/, "peer", nodeName)) - if nodeName == "" || nodeName == tm.localNodeName { - return nil, i18n.NewError(p.ctx, msgs.MsgTransportInvalidDestinationSend, tm.localNodeName, nodeName) - } - // Note the registry is responsible for caching to make this call as efficient as if // we maintained the transport details in-memory ourselves. registeredTransportDetails, err := tm.registryManager.GetNodeTransports(p.ctx, nodeName) diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index f636bc8f3..be5d27eef 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -17,6 +17,7 @@ package transportmgr import ( "context" + "database/sql/driver" "fmt" "sync/atomic" "testing" @@ -134,19 +135,21 @@ func mockActivateDeactivateOk(tp *testPlugin) { } } +func mockGoodTransport(mc *mockComponents) components.TransportClient { + mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ + { + Node: "node2", + Transport: "test1", + Details: `{"likely":"json stuff"}`, + }, + }, nil) + return nil +} + func TestSendMessage(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockEmptyReliableMsgs, - func(mc *mockComponents) components.TransportClient { - mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ - { - Node: "node2", - Transport: "test1", - Details: `{"likely":"json stuff"}`, - }, - }, nil) - return nil - }) + mockGoodTransport) defer done() message := testMessage() @@ -284,7 +287,7 @@ func TestSendMessageDestWrong(t *testing.T) { message.Component = prototk.PaladinMsg_TRANSACTION_ENGINE message.Node = "" err := tm.Send(ctx, message) - assert.Regexp(t, "PD012007", err) + assert.Regexp(t, "PD012016", err) message.Component = prototk.PaladinMsg_TRANSACTION_ENGINE message.Node = "node1" @@ -446,3 +449,60 @@ func TestReceiveMessageBadCorrelID(t *testing.T) { }) assert.Regexp(t, "PD012000", err) } + +func TestSendContextClosed(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false) + done() + + tm.peers = map[string]*peer{ + "node2": { + transport: tp.t, + sendQueue: make(chan *prototk.PaladinMsg), + }, + } + + err := tm.Send(ctx, testMessage()) + assert.Regexp(t, "PD010301", err) + +} + +func TestSendReliableOk(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + func(mc *mockComponents) components.TransportClient { + mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnResult(driver.ResultNoRows) + return nil + }, + ) + defer done() + + mockActivateDeactivateOk(tp) + pc, err := tm.SendReliable(ctx, tm.persistence.DB(), &components.ReliableMessage{ + Node: "node2", + MessageType: components.RMTState.Enum(), + Metadata: []byte(`{"some":"data"}`), + }) + require.NoError(t, err) + pc() + +} + +func TestSendReliableFail(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + func(mc *mockComponents) components.TransportClient { + mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnError(fmt.Errorf("pop")) + return nil + }, + ) + defer done() + + mockActivateDeactivateOk(tp) + _, err := tm.SendReliable(ctx, tm.persistence.DB(), &components.ReliableMessage{ + Node: "node2", + MessageType: components.RMTState.Enum(), + Metadata: []byte(`{"some":"data"}`), + }) + require.Regexp(t, "pop", err) + +} From a8fa88b4e7c253cf55525d5756b1873f8d5b5e12 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Tue, 31 Dec 2024 17:52:14 -0500 Subject: [PATCH 12/41] First real DB test of reliable re-delivery Signed-off-by: Peter Broadhurst --- .../000014_peer_queued_messages.up.sql | 12 +- .../sqlite/000014_peer_queued_messages.up.sql | 2 + core/go/internal/components/transportmgr.go | 12 +- core/go/internal/transportmgr/manager.go | 13 ++ core/go/internal/transportmgr/manager_test.go | 8 +- core/go/internal/transportmgr/peer.go | 41 ++-- core/go/internal/transportmgr/peer_test.go | 215 ++++++++++++++++++ .../internal/transportmgr/transport_test.go | 8 - 8 files changed, 269 insertions(+), 42 deletions(-) create mode 100644 core/go/internal/transportmgr/peer_test.go diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index aef4428d2..72c5413a1 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -5,14 +5,16 @@ DROP TABLE state_distribution_acknowledgments; DROP TABLE state_distributions; CREATE TABLE reliable_msgs ( - "id" UUID NOT NULL, - "created" BIGINT NOT NULL, - "node" TEXT NOT NULL, - "msg_type" TEXT NOT NULL, - "metadata" TEXT NOT NULL, + "sequence" BIGINT GENERATED ALWAYS AS IDENTITY, + "id" UUID NOT NULL, + "created" BIGINT NOT NULL, + "node" TEXT NOT NULL, + "msg_type" TEXT NOT NULL, + "metadata" TEXT NOT NULL, PRIMARY KEY ("id") ); +CREATE INDEX reliable_msgs_id ON reliable_msgs ("id"); CREATE INDEX reliable_msgs_node ON reliable_msgs ("node"); CREATE INDEX reliable_msgs_created ON reliable_msgs ("created"); diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index aef4428d2..bb69c630f 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -5,6 +5,7 @@ DROP TABLE state_distribution_acknowledgments; DROP TABLE state_distributions; CREATE TABLE reliable_msgs ( + "sequence" INTEGER PRIMARY KEY AUTOINCREMENT, "id" UUID NOT NULL, "created" BIGINT NOT NULL, "node" TEXT NOT NULL, @@ -13,6 +14,7 @@ CREATE TABLE reliable_msgs ( PRIMARY KEY ("id") ); +CREATE INDEX reliable_msgs_id ON reliable_msgs ("id"); CREATE INDEX reliable_msgs_node ON reliable_msgs ("node"); CREATE INDEX reliable_msgs_created ON reliable_msgs ("created"); diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index f0ba2d03f..23e767759 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -54,10 +54,10 @@ func (t ReliableMessageType) Options() []string { } type ReliableMessage struct { - ID uuid.UUID `json:"id" gorm:"column:id,primaryKey"` - Created tktypes.Timestamp `json:"created" gorm:"column:created,autoCreateTime:false"` // generated in our code + Sequence uint64 `json:"sequence" gorm:"column:sequence;primaryKey"` + ID uuid.UUID `json:"id" gorm:"column:id"` + Created tktypes.Timestamp `json:"created" gorm:"column:created;autoCreateTime:false"` // generated in our code Node string `json:"node" gorm:"column:node"` // The node id to send the message to - ReplyTo string `json:"replyTo" gorm:"column:reply_to"` // The identity to respond to on the sending node MessageType tktypes.Enum[ReliableMessageType] `json:"messageType" gorm:"column:msg_type"` Metadata tktypes.RawJSON `json:"metadata" gorm:"column:metadata"` Ack *ReliableMessageAck `json:"ack,omitempty" gorm:"foreignKey:MessageID;references:ID;"` @@ -68,9 +68,9 @@ func (rm ReliableMessage) TableName() string { } type ReliableMessageAck struct { - MessageID uuid.UUID `json:"-" gorm:"column:id,primaryKey"` - Time tktypes.Timestamp `json:"time,omitempty" gorm:"column:time,autoCreateTime:false"` // generated in our code - Error string `json:"error,omitempty" gorm:"column:error,autoCreateTime:false"` + MessageID uuid.UUID `json:"-" gorm:"column:id;primaryKey"` + Time tktypes.Timestamp `json:"time,omitempty" gorm:"column:time;autoCreateTime:false"` // generated in our code + Error string `json:"error,omitempty" gorm:"column:error"` } func (rma ReliableMessageAck) TableName() string { diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 4c4ac0d74..56e5ea7d3 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -28,6 +28,7 @@ import ( "github.com/kaleido-io/paladin/core/internal/msgs" "github.com/kaleido-io/paladin/core/pkg/persistence" "gorm.io/gorm" + "gorm.io/gorm/clause" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" @@ -299,3 +300,15 @@ func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg return p.notifyPersistedMsgAvailable, nil } + +func (tm *transportManager) writeAcks(ctx context.Context, dbTX *gorm.DB, acks ...*components.ReliableMessageAck) error { + for _, ack := range acks { + log.L(ctx).Infof("ack received for message %s", ack.MessageID) + ack.Time = tktypes.TimestampNow() + } + return dbTX. + WithContext(ctx). + Clauses(clause.OnConflict{DoNothing: true}). + Create(acks). + Error +} diff --git a/core/go/internal/transportmgr/manager_test.go b/core/go/internal/transportmgr/manager_test.go index 6c1a80714..3e447d8c4 100644 --- a/core/go/internal/transportmgr/manager_test.go +++ b/core/go/internal/transportmgr/manager_test.go @@ -99,9 +99,11 @@ func newTestTransportManager(t *testing.T, realDB bool, conf *pldconf.TransportM assert.Equal(t, conf.NodeName, tm.LocalNodeName()) return ctx, tm.(*transportManager), mc, func() { - logrus.SetLevel(oldLevel) - cancelCtx() - tm.Stop() + if !t.Failed() { + logrus.SetLevel(oldLevel) + cancelCtx() + tm.Stop() + } } } diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 01ceb4270..d2f5cd4bd 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -45,7 +45,7 @@ type peer struct { // Send loop state (no lock as only used on the loop) lastFullScan time.Time - lastDrainHWM *tktypes.Timestamp + lastDrainHWM *uint64 persistentMsgsDrained bool quiescing bool @@ -94,10 +94,14 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer return p, nil } - // Otherwise take the write-lock and race to connect + return tm.connectPeer(ctx, nodeName) +} + +func (tm *transportManager) connectPeer(ctx context.Context, nodeName string) (*peer, error) { + // Race to grab the write-lock and race to connect tm.peersLock.Lock() defer tm.peersLock.Unlock() - p = tm.peers[nodeName] + p := tm.peers[nodeName] if p != nil { // There was a race to connect to this peer, and the other routine won log.L(ctx).Debugf("connection already active for peer '%s' (after connection race)", nodeName) @@ -166,16 +170,6 @@ func (p *peer) notifyPersistedMsgAvailable() { } } -func (p *peer) stateDistributionMsg(rm *components.ReliableMessage, sd *components.StateDistributionWithData) *prototk.PaladinMsg { - payload, _ := json.Marshal(sd) - return &prototk.PaladinMsg{ - MessageId: rm.ID.String(), - MessageType: "StateProducedEvent", - Payload: payload, - Component: prototk.PaladinMsg_TRANSACTION_ENGINE, - } -} - func (p *peer) send(msg *prototk.PaladinMsg) error { return p.tm.sendShortRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { return true, p.transport.send(p.ctx, p.name, msg) @@ -236,18 +230,18 @@ func (p *peer) reliableMessageScan() error { const pageSize = 100 var total = 0 - var lastPageEnd *tktypes.Timestamp + var lastPageEnd *uint64 for { query := p.tm.persistence.DB(). WithContext(p.ctx). - Order("created ASC"). + Order("sequence ASC"). Joins("Ack"). Where(`"Ack"."time" IS NULL`). Limit(pageSize) if lastPageEnd != nil { - query = query.Where("created > ?", *lastPageEnd) + query = query.Where("sequence > ?", *lastPageEnd) } else if !fullScan { - query = query.Where("created > ?", *p.lastDrainHWM) + query = query.Where("sequence > ?", *p.lastDrainHWM) } var page []*components.ReliableMessage @@ -266,7 +260,7 @@ func (p *peer) reliableMessageScan() error { if len(page) > 0 { p.persistentMsgsDrained = false // we know there's some messages total += len(page) - lastPageEnd = &page[len(page)-1].Created + lastPageEnd = &page[len(page)-1].Sequence } // If we didn't have a full page, then we're done @@ -322,8 +316,14 @@ func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*proto i18n.NewError(p.ctx, msgs.MsgTransportStateNotAvailableLocally, sd.Domain, *contractAddr, stateID), nil } + sd.StateData = state.Data - return nil, nil, nil + return &prototk.PaladinMsg{ + MessageId: rm.ID.String(), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: string(rm.MessageType), + Payload: tktypes.JSONString(sd), + }, nil, nil } func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err error) { @@ -334,7 +334,7 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e for _, rm := range page { // Check it's either after our HWM, or eligible for re-send - afterHWM := p.lastDrainHWM == nil || *p.lastDrainHWM < rm.Created + afterHWM := p.lastDrainHWM == nil || *p.lastDrainHWM < rm.Sequence if !afterHWM && time.Since(rm.Created.Time()) < p.tm.reliableMessageResend { log.L(p.ctx).Infof("Unacknowledged message %s not yet eligible for re-send", rm.ID) continue @@ -356,6 +356,7 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e case err != nil: return err case errorAck != nil: + log.L(p.ctx).Errorf("Unable to send reliable message %s - writing persistent error: %s", rm.ID, errorAck) errorAcks = append(errorAcks, &components.ReliableMessageAck{ MessageID: rm.ID, Time: tktypes.TimestampNow(), diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go new file mode 100644 index 000000000..dbe769b47 --- /dev/null +++ b/core/go/internal/transportmgr/peer_test.go @@ -0,0 +1,215 @@ +/* + * Copyright © 2024 Kaleido, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package transportmgr + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "testing" + "time" + + "github.com/kaleido-io/paladin/config/pkg/confutil" + "github.com/kaleido-io/paladin/config/pkg/pldconf" + "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" + "github.com/kaleido-io/paladin/toolkit/pkg/prototk" + "github.com/kaleido-io/paladin/toolkit/pkg/retry" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "gorm.io/gorm" +) + +func TestReliableMessageResendRealDB(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, true, + mockGoodTransport, + func(mc *mockComponents) components.TransportClient { + mGS := mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false) + mGS.Run(func(args mock.Arguments) { + mGS.Return(&pldapi.State{ + StateBase: pldapi.StateBase{ + DomainName: args[2].(string), + ContractAddress: args[3].(tktypes.EthAddress), + ID: args[4].(tktypes.HexBytes), + Data: []byte(fmt.Sprintf(`{"dataFor": "%s"}`, args[4].(tktypes.HexBytes).HexString())), + }, + }, nil) + }) + return nil + }, + ) + defer done() + + tm.sendShortRetry = retry.NewRetryLimited(&pldconf.RetryConfigWithMax{ + MaxAttempts: confutil.P(1), + }) + tm.quiesceTimeout = 10 * time.Millisecond + tm.reliableMessageResend = 10 * time.Millisecond + tm.peerInactivityTimeout = 1 * time.Second + + mockActivateDeactivateOk(tp) + + sentMessages := make(chan *prototk.PaladinMsg) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + sentMessages <- sent + return nil, nil + } + + sds := make([]*components.StateDistribution, 2) + postCommits := make([]func(), 0) + _ = tm.persistence.DB().Transaction(func(dbTX *gorm.DB) error { + for i := 0; i < len(sds); i++ { + sds[i] = &components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + StateID: tktypes.RandHex(32), + } + + postCommit, err := tm.SendReliable(ctx, dbTX, &components.ReliableMessage{ + MessageType: components.RMTState.Enum(), + Node: "node2", + Metadata: tktypes.JSONString(sds[i]), + }) + require.NoError(t, err) + postCommits = append(postCommits, postCommit) + } + return nil + }) + for _, pc := range postCommits { + pc() + } + + // Check we get the two messages twice, with the send retry kicking in + for i := 0; i < 2; i++ { + for iSD := 0; iSD < len(sds); iSD++ { + msg := <-sentMessages + var receivedSD components.StateDistributionWithData + err := json.Unmarshal(msg.Payload, &receivedSD) + require.NoError(t, err) + require.Equal(t, sds[iSD], &receivedSD.StateDistribution) + var receivedState pldapi.State + err = json.Unmarshal(receivedSD.StateData, &receivedState) + require.NoError(t, err) + require.JSONEq(t, fmt.Sprintf(`{"dataFor": "%s"}`, receivedSD.StateID), string(receivedSD.StateData)) + } + } + + // From this point on we just drain + go func() { + for range sentMessages { + } + }() + + // Close the peer + tm.peers["node2"].close() + + // Clean up the routine + close(sentMessages) + +} + +func TestNameSortedPeers(t *testing.T) { + + peerList := nameSortedPeers{ + {name: "ccc"}, + {name: "aaa"}, + {name: "ddd"}, + {name: "bbb"}, + } + + sort.Sort(peerList) + + require.Equal(t, nameSortedPeers{ + {name: "aaa"}, + {name: "bbb"}, + {name: "ccc"}, + {name: "ddd"}, + }, peerList) + +} + +func TestConnectionRace(t *testing.T) { + + connWaiting := make(chan struct{}) + connRelease := make(chan struct{}) + + ctx, tm, tp, done := newTestTransport(t, false, + func(mc *mockComponents) components.TransportClient { + mGNT := mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ + { + Node: "node2", + Transport: "test1", + Details: `{"likely":"json stuff"}`, + }, + }, nil) + mGNT.Run(func(args mock.Arguments) { + close(connWaiting) + <-connRelease + }) + return nil + }, + ) + defer done() + + mockActivateDeactivateOk(tp) + connDone := make(chan bool) + for i := 0; i < 2; i++ { + go func() { + _, err := tm.connectPeer(ctx, "node2") + require.NoError(t, err) + connDone <- true + }() + } + <-connWaiting + time.Sleep(10 * time.Millisecond) + close(connRelease) + <-connDone + <-connDone + +} + +func TestActivateFail(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport) + defer done() + + tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + return nil, fmt.Errorf("pop") + } + + _, err := tm.getPeer(ctx, "node2") + assert.Regexp(t, "pop", err) + +} + +func TestActivateBadPeerInfo(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport) + defer done() + + tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + return &prototk.ActivateNodeResponse{PeerInfoJson: ""}, nil + } + + _, err := tm.getPeer(ctx, "node2") + assert.Regexp(t, "PD012015", err) + +} diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index be5d27eef..9ca8850f6 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -161,14 +161,6 @@ func TestSendMessage(t *testing.T) { assert.NotEmpty(t, sent.MessageId) assert.Equal(t, message.CorrelationID.String(), *sent.CorrelationId) assert.Equal(t, message.Payload, sent.Payload) - - // ... if we didn't have a connection established we'd expect to come back to request the details - gtdr, err := tp.t.GetTransportDetails(ctx, &prototk.GetTransportDetailsRequest{ - Node: "node2", - }) - require.NoError(t, err) - assert.NotEmpty(t, gtdr.TransportDetails) - sentMessages <- sent return nil, nil } From 01f1fa7624776929de0656312cc8efb4192057bb Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Wed, 1 Jan 2025 18:41:11 -0500 Subject: [PATCH 13/41] Work through additional scenarios Signed-off-by: Peter Broadhurst --- .../000014_peer_queued_messages.up.sql | 2 +- .../sqlite/000014_peer_queued_messages.up.sql | 2 +- core/go/internal/msgs/en_errors.go | 1 + core/go/internal/transportmgr/manager.go | 16 +++ core/go/internal/transportmgr/peer.go | 2 +- core/go/internal/transportmgr/peer_test.go | 136 ++++++++++++++++-- 6 files changed, 142 insertions(+), 17 deletions(-) diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index 72c5413a1..76fa86251 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -10,7 +10,7 @@ CREATE TABLE reliable_msgs ( "created" BIGINT NOT NULL, "node" TEXT NOT NULL, "msg_type" TEXT NOT NULL, - "metadata" TEXT NOT NULL, + "metadata" TEXT , PRIMARY KEY ("id") ); diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index bb69c630f..741e427e9 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -10,7 +10,7 @@ CREATE TABLE reliable_msgs ( "created" BIGINT NOT NULL, "node" TEXT NOT NULL, "msg_type" TEXT NOT NULL, - "metadata" TEXT NOT NULL, + "metadata" TEXT , PRIMARY KEY ("id") ); diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index d326b6562..cc09c4857 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -370,6 +370,7 @@ var ( MsgTransportStateNotAvailableLocally = ffe("PD012014", "State not available locally: domain=%s,contract=%s,id=%s") MsgTransportInvalidPeerInfo = ffe("PD012015", "Invalid peer info JSON returned by plugin") MsgTransportInvalidTargetNode = ffe("PD012016", "Invalid target node '%s'") + MsgTransportInvalidMessageData = ffe("PD012017", "Invalid data for message %s") // RegistryManager module PD0121XX MsgRegistryNodeEntiresNotFound = ffe("PD012100", "No entries found for node '%s'") diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 56e5ea7d3..8b320c9a9 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -312,3 +312,19 @@ func (tm *transportManager) writeAcks(ctx context.Context, dbTX *gorm.DB, acks . Create(acks). Error } + +func (tm *transportManager) getReliableMessageByID(ctx context.Context, dbTX *gorm.DB, id uuid.UUID) (*components.ReliableMessage, error) { + var rms []*components.ReliableMessage + err := dbTX. + WithContext(ctx). + Order("sequence ASC"). + Joins("Ack"). + Where(`"reliable_msgs"."id" = ?`, id). + Limit(1). + Find(&rms). + Error + if err != nil || len(rms) < 1 { + return nil, err + } + return rms[0], nil +} diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index d2f5cd4bd..c3143c405 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -303,7 +303,7 @@ func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*proto contractAddr, parseErr = tktypes.ParseEthAddress(sd.ContractAddress) } if parseErr != nil { - return nil, parseErr, nil + return nil, i18n.WrapError(p.ctx, parseErr, msgs.MsgTransportInvalidMessageData, rm.ID), nil } // Get the state - distinguishing between not found, vs. a retryable error diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index dbe769b47..6d801b3b2 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -23,6 +23,7 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" @@ -36,24 +37,26 @@ import ( "gorm.io/gorm" ) +func mockGetStateOk(mc *mockComponents) components.TransportClient { + mGS := mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false) + mGS.Run(func(args mock.Arguments) { + mGS.Return(&pldapi.State{ + StateBase: pldapi.StateBase{ + DomainName: args[2].(string), + ContractAddress: args[3].(tktypes.EthAddress), + ID: args[4].(tktypes.HexBytes), + Data: []byte(fmt.Sprintf(`{"dataFor": "%s"}`, args[4].(tktypes.HexBytes).HexString())), + }, + }, nil) + }) + return nil +} + func TestReliableMessageResendRealDB(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, true, mockGoodTransport, - func(mc *mockComponents) components.TransportClient { - mGS := mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false) - mGS.Run(func(args mock.Arguments) { - mGS.Return(&pldapi.State{ - StateBase: pldapi.StateBase{ - DomainName: args[2].(string), - ContractAddress: args[3].(tktypes.EthAddress), - ID: args[4].(tktypes.HexBytes), - Data: []byte(fmt.Sprintf(`{"dataFor": "%s"}`, args[4].(tktypes.HexBytes).HexString())), - }, - }, nil) - }) - return nil - }, + mockGetStateOk, ) defer done() @@ -126,6 +129,111 @@ func TestReliableMessageResendRealDB(t *testing.T) { } +func TestReliableMessageSendSendQuiesceRealDB(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, true, + mockGoodTransport, + mockGetStateOk, + ) + defer done() + + tm.sendShortRetry = retry.NewRetryLimited(&pldconf.RetryConfigWithMax{ + MaxAttempts: confutil.P(1), + }) + tm.quiesceTimeout = 10 * time.Millisecond + tm.reliableMessageResend = 1 * time.Second + tm.peerInactivityTimeout = 10 * time.Millisecond + + mockActivateDeactivateOk(tp) + + sentMessages := make(chan *prototk.PaladinMsg) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + sentMessages <- sent + return nil, nil + } + + // Here we send two messages one at a time and check they arrive + msgIDs := make([]uuid.UUID, 2) + for i := 0; i < 2; i++ { + sd := &components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + StateID: tktypes.RandHex(32), + } + + postCommit, err := tm.SendReliable(ctx, tm.persistence.DB(), &components.ReliableMessage{ + MessageType: components.RMTState.Enum(), + Node: "node2", + Metadata: tktypes.JSONString(sd), + }) + require.NoError(t, err) + postCommit() + + msg := <-sentMessages + var receivedSD components.StateDistributionWithData + err = json.Unmarshal(msg.Payload, &receivedSD) + require.NoError(t, err) + require.Equal(t, sd, &receivedSD.StateDistribution) + var receivedState pldapi.State + err = json.Unmarshal(receivedSD.StateData, &receivedState) + require.NoError(t, err) + require.JSONEq(t, fmt.Sprintf(`{"dataFor": "%s"}`, receivedSD.StateID), string(receivedSD.StateData)) + + msgIDs[i] = uuid.MustParse(msg.MessageId) + } + + // Deliver the two acks + p := tm.peers["node2"] + for _, msgID := range msgIDs { + err := tm.writeAcks(ctx, tm.persistence.DB(), &components.ReliableMessageAck{ + MessageID: msgID, + }) + require.NoError(t, err) + } + + // Wait for the peer to end via quiesce + <-p.done + +} + +func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, true, + mockGoodTransport, + ) + defer done() + + tm.sendShortRetry = retry.NewRetryLimited(&pldconf.RetryConfigWithMax{ + MaxAttempts: confutil.P(1), + }) + tm.quiesceTimeout = 10 * time.Millisecond + tm.reliableMessageResend = 10 * time.Millisecond + tm.peerInactivityTimeout = 1 * time.Second + + mockActivateDeactivateOk(tp) + + rm := &components.ReliableMessage{ + MessageType: components.RMTState.Enum(), + Node: "node2", + // Missing metadata + } + postCommit, err := tm.SendReliable(ctx, tm.persistence.DB(), rm) + require.NoError(t, err) + postCommit() + + // Wait for nack + var rmWithAck *components.ReliableMessage + for (rmWithAck == nil || rmWithAck.Ack == nil) && !t.Failed() { + time.Sleep(10 * time.Millisecond) + rmWithAck, err = tm.getReliableMessageByID(ctx, tm.persistence.DB(), rm.ID) + require.NoError(t, err) + } + require.NotNil(t, rmWithAck.Ack) + require.Regexp(t, "PD012017", rmWithAck.Ack.Error) + +} + func TestNameSortedPeers(t *testing.T) { peerList := nameSortedPeers{ From dc0ca39a493a0bd84f0209a3786122b4b7fa5ece Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Wed, 1 Jan 2025 20:36:02 -0500 Subject: [PATCH 14/41] Close out test of send path, ready for receive Signed-off-by: Peter Broadhurst --- core/go/internal/transportmgr/manager.go | 32 ++- core/go/internal/transportmgr/peer.go | 17 +- core/go/internal/transportmgr/peer_test.go | 265 +++++++++++++++++- .../internal/transportmgr/transport_test.go | 21 ++ 4 files changed, 307 insertions(+), 28 deletions(-) diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 8b320c9a9..5c5b8343f 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -64,25 +64,27 @@ type transportManager struct { peerInactivityTimeout time.Duration quiesceTimeout time.Duration - senderBufferLen int - reliableMessageResend time.Duration + senderBufferLen int + reliableMessageResend time.Duration + reliableMessagePageSize int } func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerConfig) components.TransportManager { return &transportManager{ - bgCtx: bgCtx, - conf: conf, - localNodeName: conf.NodeName, - transportsByID: make(map[uuid.UUID]*transport), - transportsByName: make(map[string]*transport), - components: make(map[prototk.PaladinMsg_Component]components.TransportClient), - peers: make(map[string]*peer), - senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), - reliableMessageResend: confutil.DurationMin(conf.ReliableMessageResend, 100*time.Millisecond, *pldconf.TransportManagerDefaults.ReliableMessageResend), - sendShortRetry: retry.NewRetryLimited(&conf.SendRetry, &pldconf.TransportManagerDefaults.SendRetry), - reliableScanRetry: retry.NewRetryIndefinite(&conf.ReliableScanRetry, &pldconf.TransportManagerDefaults.ReliableScanRetry), - peerInactivityTimeout: confutil.DurationMin(conf.PeerInactivityTimeout, 0, *pldconf.TransportManagerDefaults.PeerInactivityTimeout), - quiesceTimeout: 1 * time.Second, // not currently tunable (considered very small edge case) + bgCtx: bgCtx, + conf: conf, + localNodeName: conf.NodeName, + transportsByID: make(map[uuid.UUID]*transport), + transportsByName: make(map[string]*transport), + components: make(map[prototk.PaladinMsg_Component]components.TransportClient), + peers: make(map[string]*peer), + senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), + reliableMessageResend: confutil.DurationMin(conf.ReliableMessageResend, 100*time.Millisecond, *pldconf.TransportManagerDefaults.ReliableMessageResend), + sendShortRetry: retry.NewRetryLimited(&conf.SendRetry, &pldconf.TransportManagerDefaults.SendRetry), + reliableScanRetry: retry.NewRetryIndefinite(&conf.ReliableScanRetry, &pldconf.TransportManagerDefaults.ReliableScanRetry), + peerInactivityTimeout: confutil.DurationMin(conf.PeerInactivityTimeout, 0, *pldconf.TransportManagerDefaults.PeerInactivityTimeout), + quiesceTimeout: 1 * time.Second, // not currently tunable (considered very small edge case) + reliableMessagePageSize: 100, // not currently tunable } } diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index c3143c405..a6db9f412 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -213,22 +213,14 @@ func (p *peer) deactivate() { } } -func (p *peer) reliableMessageScan() error { +func (p *peer) reliableMessageScan(checkNew bool) error { - checkNew := true fullScan := p.lastDrainHWM == nil || time.Since(p.lastFullScan) >= p.tm.reliableMessageResend - select { - case <-p.persistedMsgsAvailable: - checkNew = true - default: - } - if !fullScan && !checkNew { return nil // Nothing to do } - const pageSize = 100 - + pageSize := p.tm.reliableMessagePageSize var total = 0 var lastPageEnd *uint64 for { @@ -397,12 +389,13 @@ func (p *peer) sender() { log.L(p.ctx).Infof("peer %s active", p.name) + checkNew := false hitInactivityTimeout := false for { // We send/resend any reliable messages queued up first err := p.tm.reliableScanRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { - return true, p.reliableMessageScan() + return true, p.reliableMessageScan(checkNew) }) if err != nil { return // context closed @@ -414,6 +407,7 @@ func (p *peer) sender() { return // quiesce handling is in senderDone() deferred function } hitInactivityTimeout = false + checkNew = false // Our wait timeout needs to be the shortest of: // - The full re-scan timeout for reliable messages @@ -430,6 +424,7 @@ func (p *peer) sender() { hitInactivityTimeout = true processingMsgs = false // spin round and check if we have persisted messages to (re)process case <-p.persistedMsgsAvailable: + checkNew = true processingMsgs = false // spin round and get the messages case <-p.ctx.Done(): return // we're done diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index 6d801b3b2..c3a800583 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -17,6 +17,7 @@ package transportmgr import ( "context" + "database/sql/driver" "encoding/json" "fmt" "sort" @@ -27,6 +28,7 @@ import ( "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/retry" @@ -37,6 +39,13 @@ import ( "gorm.io/gorm" ) +func mockGetStateRetryThenOk(mc *mockComponents) components.TransportClient { + mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false). + Return(nil, fmt.Errorf("pop")).Once() + mockGetStateOk(mc) + return nil +} + func mockGetStateOk(mc *mockComponents) components.TransportClient { mGS := mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false) mGS.Run(func(args mock.Arguments) { @@ -56,16 +65,20 @@ func TestReliableMessageResendRealDB(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, true, mockGoodTransport, - mockGetStateOk, + mockGetStateRetryThenOk, ) defer done() tm.sendShortRetry = retry.NewRetryLimited(&pldconf.RetryConfigWithMax{ MaxAttempts: confutil.P(1), }) + tm.reliableScanRetry = retry.NewRetryIndefinite(&pldconf.RetryConfig{ + MaxDelay: confutil.P("1ms"), + }) tm.quiesceTimeout = 10 * time.Millisecond tm.reliableMessageResend = 10 * time.Millisecond tm.peerInactivityTimeout = 1 * time.Second + tm.reliableMessagePageSize = 1 // forking pagination mockActivateDeactivateOk(tp) @@ -137,6 +150,8 @@ func TestReliableMessageSendSendQuiesceRealDB(t *testing.T) { ) defer done() + log.SetLevel("debug") + tm.sendShortRetry = retry.NewRetryLimited(&pldconf.RetryConfigWithMax{ MaxAttempts: confutil.P(1), }) @@ -201,6 +216,12 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, true, mockGoodTransport, + func(mc *mockComponents) components.TransportClient { + // missing state + mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false). + Return(nil, nil).Once() + return nil + }, ) defer done() @@ -213,15 +234,29 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { mockActivateDeactivateOk(tp) + // First with missing metadata rm := &components.ReliableMessage{ MessageType: components.RMTState.Enum(), Node: "node2", - // Missing metadata } postCommit, err := tm.SendReliable(ctx, tm.persistence.DB(), rm) require.NoError(t, err) postCommit() + // Second with missing state + rm2 := &components.ReliableMessage{ + MessageType: components.RMTState.Enum(), + Node: "node2", + Metadata: tktypes.JSONString(&components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + StateID: tktypes.RandHex(32), + }), + } + postCommit, err = tm.SendReliable(ctx, tm.persistence.DB(), rm2) + require.NoError(t, err) + postCommit() + // Wait for nack var rmWithAck *components.ReliableMessage for (rmWithAck == nil || rmWithAck.Ack == nil) && !t.Failed() { @@ -232,6 +267,12 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { require.NotNil(t, rmWithAck.Ack) require.Regexp(t, "PD012017", rmWithAck.Ack.Error) + // Second nack + rmWithAck, err = tm.getReliableMessageByID(ctx, tm.persistence.DB(), rm2.ID) + require.NoError(t, err) + require.NotNil(t, rmWithAck.Ack) + require.Regexp(t, "PD012014", rmWithAck.Ack.Error) + } func TestNameSortedPeers(t *testing.T) { @@ -321,3 +362,223 @@ func TestActivateBadPeerInfo(t *testing.T) { assert.Regexp(t, "PD012015", err) } + +func TestQuiesceDetectPersistentMessage(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport) + defer done() + + // Load up a notification for a persistent message + tm.reliableMessageResend = 10 * time.Millisecond + tm.peerInactivityTimeout = 1 * time.Second + tm.quiesceTimeout = 1 * time.Second + + mockActivateDeactivateOk(tp) + + quiescingPeer, err := tm.getPeer(ctx, "node2") + require.NoError(t, err) + + // Force cancel that peer + quiescingPeer.cancelCtx() + <-quiescingPeer.done + + // Simulate quiescing with persistent messages delivered + quiescingPeer.quiescing = true + quiescingPeer.done = make(chan struct{}) + quiescingPeer.persistedMsgsAvailable = make(chan struct{}, 1) + quiescingPeer.persistedMsgsAvailable <- struct{}{} + + // Now in quiesce it will start up a new one + quiescingPeer.senderDone() + + require.NotNil(t, tm.peers["node2"]) + +} + +func TestQuiesceDetectFireAndForgetMessage(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + mockEmptyReliableMsgs, + ) + defer done() + + // Load up a notification for a persistent message + tm.reliableMessageResend = 1 * time.Second + tm.peerInactivityTimeout = 1 * time.Second + tm.quiesceTimeout = 1 * time.Second + + mockActivateDeactivateOk(tp) + + quiescingPeer, err := tm.getPeer(ctx, "node2") + require.NoError(t, err) + + // Force cancel that peer + quiescingPeer.cancelCtx() + <-quiescingPeer.done + + // Simulate quiescing with persistent messages delivered + quiescingPeer.quiescing = true + quiescingPeer.ctx = ctx + quiescingPeer.done = make(chan struct{}) + quiescingPeer.sendQueue = make(chan *prototk.PaladinMsg, 1) + quiescingPeer.sendQueue <- &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, + MessageType: "example", + Payload: []byte(`{}`), + } + + sentMessages := make(chan *prototk.PaladinMsg, 1) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + assert.NotEmpty(t, sent.MessageId) + sentMessages <- sent + return nil, nil + } + // Now in quiesce it will start up a new one + quiescingPeer.senderDone() + + require.NotNil(t, tm.peers["node2"]) + + <-sentMessages + +} + +func TestDeactivateFail(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + ) + defer done() + + tm.reliableMessageResend = 1 * time.Second + tm.peerInactivityTimeout = 1 * time.Second + tm.quiesceTimeout = 1 * time.Millisecond + + tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + return &prototk.ActivateNodeResponse{PeerInfoJson: `{"endpoint":"some.url"}`}, nil + } + tp.Functions.DeactivateNode = func(ctx context.Context, dnr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + return nil, fmt.Errorf("pop") + } + + _, err := tm.getPeer(ctx, "node2") + require.NoError(t, err) + +} + +func TestGetReliableMessageByIDFail(t *testing.T) { + + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnError(fmt.Errorf("pop")) + return nil + }) + defer done() + + _, err := tm.getReliableMessageByID(ctx, tm.persistence.DB(), uuid.New()) + require.Regexp(t, "pop", err) + +} + +func TestGetReliableMessageScanNoAction(t *testing.T) { + + _, tm, _, done := newTestTransport(t, false) + defer done() + + tm.reliableMessageResend = 100 * time.Second + + p := &peer{ + tm: tm, + lastDrainHWM: confutil.P(uint64(100)), + lastFullScan: time.Now(), + } + + require.Nil(t, p.reliableMessageScan(false)) + +} + +func TestProcessReliableMsgPageIgnoreBeforeHWM(t *testing.T) { + + ctx, tm, _, done := newTestTransport(t, false) + defer done() + + p := &peer{ + ctx: ctx, + tm: tm, + lastDrainHWM: confutil.P(uint64(100)), + } + + err := p.processReliableMsgPage([]*components.ReliableMessage{ + { + ID: uuid.New(), + Sequence: 50, + Created: tktypes.TimestampNow(), + }, + }) + require.NoError(t, err) + +} + +func TestProcessReliableMsgPageIgnoreUnsupported(t *testing.T) { + + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + mc.db.Mock.ExpectExec("INSERT.*reliable_msg_acks").WillReturnError(fmt.Errorf("pop")) + return nil + }) + defer done() + + p := &peer{ + ctx: ctx, + tm: tm, + } + + err := p.processReliableMsgPage([]*components.ReliableMessage{ + { + ID: uuid.New(), + Sequence: 50, + Created: tktypes.TimestampNow(), + MessageType: components.RMTReceipt.Enum(), + }, + }) + require.Regexp(t, "pop", err) + +} + +func TestProcessReliableMsgPageInsertFail(t *testing.T) { + + ctx, tm, tp, done := newTestTransport(t, false, + mockGetStateOk, + func(mc *mockComponents) components.TransportClient { + mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnResult(driver.ResultNoRows) + return nil + }) + defer done() + + p := &peer{ + ctx: ctx, + tm: tm, + transport: tp.t, + } + + sd := &components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + StateID: tktypes.RandHex(32), + } + + rm := &components.ReliableMessage{ + ID: uuid.New(), + Sequence: 50, + MessageType: components.RMTState.Enum(), + Node: "node2", + Metadata: tktypes.JSONString(sd), + Created: tktypes.TimestampNow(), + } + + err := p.processReliableMsgPage([]*components.ReliableMessage{rm}) + require.Regexp(t, "PD020302", err) + +} diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index 9ca8850f6..8ed1fa720 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -270,6 +270,27 @@ func TestSendMessageDestNotAvailable(t *testing.T) { } +func TestGetTransportDetailsOk(t *testing.T) { + ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ + { + Node: "node1", + Transport: "test1", + Details: `{"the":"stuff we need"}`, + }, + }, nil) + return nil + }) + defer done() + + tspt, err := tp.t.GetTransportDetails(ctx, &prototk.GetTransportDetailsRequest{ + Node: "node2", + }) + assert.NoError(t, err) + require.NotEmpty(t, tspt.TransportDetails) + +} + func TestSendMessageDestWrong(t *testing.T) { ctx, tm, _, done := newTestTransport(t, false) defer done() From 7eaa16289def67f18401c54d644bc6607ee7d05c Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Thu, 2 Jan 2025 10:43:38 -0500 Subject: [PATCH 15/41] Allow active peer without sender connected and add stats Signed-off-by: Peter Broadhurst --- core/go/internal/msgs/en_errors.go | 5 +- core/go/internal/transportmgr/manager.go | 6 +- core/go/internal/transportmgr/peer.go | 145 +++++++++++------- core/go/internal/transportmgr/peer_test.go | 49 +++--- .../internal/transportmgr/transport_test.go | 12 +- toolkit/go/pkg/pldapi/peerinfo.go | 37 +++++ 6 files changed, 165 insertions(+), 89 deletions(-) create mode 100644 toolkit/go/pkg/pldapi/peerinfo.go diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index cc09c4857..9b3dea51f 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -368,9 +368,8 @@ var ( MsgTransportClientRegisterAfterStartup = ffe("PD012012", "Client '%s' attempted registration after startup") MsgTransportUnsupportedReliableMsg = ffe("PD012013", "Unsupported reliable message type '%s'") MsgTransportStateNotAvailableLocally = ffe("PD012014", "State not available locally: domain=%s,contract=%s,id=%s") - MsgTransportInvalidPeerInfo = ffe("PD012015", "Invalid peer info JSON returned by plugin") - MsgTransportInvalidTargetNode = ffe("PD012016", "Invalid target node '%s'") - MsgTransportInvalidMessageData = ffe("PD012017", "Invalid data for message %s") + MsgTransportInvalidTargetNode = ffe("PD012015", "Invalid target node '%s'") + MsgTransportInvalidMessageData = ffe("PD012016", "Invalid data for message %s") // RegistryManager module PD0121XX MsgRegistryNodeEntiresNotFound = ffe("PD012100", "No entries found for node '%s'") diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 5c5b8343f..16b21018b 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -253,7 +253,7 @@ func (tm *transportManager) Send(ctx context.Context, send *components.FireAndFo func (tm *transportManager) queueFireAndForget(ctx context.Context, nodeName string, msg *prototk.PaladinMsg) error { // Use or establish a p connection for the send - p, err := tm.getPeer(ctx, nodeName) + p, err := tm.getPeer(ctx, nodeName, true) if err == nil { err = p.transport.checkInit(ctx) } @@ -267,7 +267,7 @@ func (tm *transportManager) queueFireAndForget(ctx context.Context, nodeName str // use this "send" must be fault tolerant to message loss. select { case p.sendQueue <- msg: - log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageId, tktypes.StrOrEmpty(msg.CorrelationId), p.name) + log.L(ctx).Debugf("queued %s message %s (cid=%v) to %s", msg.MessageType, msg.MessageId, tktypes.StrOrEmpty(msg.CorrelationId), p.Name) return nil case <-ctx.Done(): return i18n.NewError(ctx, msgs.MsgContextCanceled) @@ -285,7 +285,7 @@ func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg _, err = msg.MessageType.Validate() if err == nil { - p, err = tm.getPeer(ctx, msg.Node) + p, err = tm.getPeer(ctx, msg.Node, true) } if err == nil { diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index a6db9f412..7fbf0981f 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -20,12 +20,15 @@ import ( "context" "encoding/json" "sort" + "sync" + "sync/atomic" "time" "github.com/hyperledger/firefly-common/pkg/i18n" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/msgs" "github.com/kaleido-io/paladin/toolkit/pkg/log" + "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" "gorm.io/gorm/clause" @@ -35,10 +38,11 @@ type peer struct { ctx context.Context cancelCtx context.CancelFunc - name string tm *transportManager - transport *transport // the transport mutually supported by us and the remote node - peerInfo map[string]any // opaque JSON object from the transport + transport *transport // the transport mutually supported by us and the remote node + + pldapi.PeerInfo + statsLock sync.Mutex persistedMsgsAvailable chan struct{} sendQueue chan *prototk.PaladinMsg @@ -48,15 +52,16 @@ type peer struct { lastDrainHWM *uint64 persistentMsgsDrained bool - quiescing bool - done chan struct{} + quiescing bool + senderStarted atomic.Bool + senderDone chan struct{} } type nameSortedPeers []*peer func (p nameSortedPeers) Len() int { return len(p) } func (p nameSortedPeers) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p nameSortedPeers) Less(i, j int) bool { return cmp.Less(p[i].name, p[j].name) } +func (p nameSortedPeers) Less(i, j int) bool { return cmp.Less(p[i].Name, p[j].Name) } // get a list of all active peers func (tm *transportManager) listActivePeers() nameSortedPeers { @@ -77,7 +82,7 @@ func (tm *transportManager) getActivePeer(nodeName string) *peer { return tm.peers[nodeName] } -func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer, error) { +func (tm *transportManager) getPeer(ctx context.Context, nodeName string, sending bool) (*peer, error) { if err := tktypes.ValidateSafeCharsStartEndAlphaNum(ctx, nodeName, tktypes.DefaultNameMaxLen, "node"); err != nil { return nil, i18n.WrapError(ctx, err, msgs.MsgTransportInvalidTargetNode, nodeName) @@ -88,43 +93,62 @@ func (tm *transportManager) getPeer(ctx context.Context, nodeName string) (*peer // Hopefully this is an already active connection p := tm.getActivePeer(nodeName) - if p != nil { + if p != nil && (p.senderStarted.Load() || !sending) { // Already active and obtained via read-lock log.L(ctx).Debugf("connection already active for peer '%s'", nodeName) return p, nil } - return tm.connectPeer(ctx, nodeName) + return tm.connectPeer(ctx, nodeName, sending) } -func (tm *transportManager) connectPeer(ctx context.Context, nodeName string) (*peer, error) { +func (tm *transportManager) connectPeer(ctx context.Context, nodeName string, sending bool) (*peer, error) { // Race to grab the write-lock and race to connect tm.peersLock.Lock() defer tm.peersLock.Unlock() p := tm.peers[nodeName] - if p != nil { + if p != nil && (p.senderStarted.Load() || !sending) { // There was a race to connect to this peer, and the other routine won log.L(ctx).Debugf("connection already active for peer '%s' (after connection race)", nodeName) return p, nil } - // We need to resolve the node transport, and build a new connection - log.L(ctx).Debugf("attempting connection for peer '%s'", nodeName) - p = &peer{ - tm: tm, - name: nodeName, - persistedMsgsAvailable: make(chan struct{}, 1), - sendQueue: make(chan *prototk.PaladinMsg, tm.senderBufferLen), - done: make(chan struct{}), + if p == nil { + // We need to resolve the node transport, and build a new connection + log.L(ctx).Debugf("activating new peer '%s'", nodeName) + p = &peer{ + tm: tm, + PeerInfo: pldapi.PeerInfo{ + Name: nodeName, + Activated: tktypes.TimestampNow(), + }, + persistedMsgsAvailable: make(chan struct{}, 1), + sendQueue: make(chan *prototk.PaladinMsg, tm.senderBufferLen), + senderDone: make(chan struct{}), + } + p.ctx, p.cancelCtx = context.WithCancel( + log.WithLogField(tm.bgCtx /* go-routine need bg context*/, "peer", nodeName)) + } + tm.peers[nodeName] = p + + if sending { + if err := p.startSender(); err != nil { + // Note the peer is still in our list, but not connected for send. + // This means status can be reported for it. + p.OutboundError = err + return nil, err + } } - p.ctx, p.cancelCtx = context.WithCancel( - log.WithLogField(tm.bgCtx /* go-routine need bg context*/, "peer", nodeName)) + return p, nil +} + +func (p *peer) startSender() error { // Note the registry is responsible for caching to make this call as efficient as if // we maintained the transport details in-memory ourselves. - registeredTransportDetails, err := tm.registryManager.GetNodeTransports(p.ctx, nodeName) + registeredTransportDetails, err := p.tm.registryManager.GetNodeTransports(p.ctx, p.Name) if err != nil { - return nil, err + return err } // See if any of the transports registered by the node, are configured on this local node @@ -132,7 +156,7 @@ func (tm *transportManager) connectPeer(ctx context.Context, nodeName string) (* // fallback to a secondary one currently. var remoteTransportDetails string for _, rtd := range registeredTransportDetails { - p.transport = tm.transportsByName[rtd.Transport] + p.transport = p.tm.transportsByName[rtd.Transport] remoteTransportDetails = rtd.Details } if p.transport == nil { @@ -141,26 +165,28 @@ func (tm *transportManager) connectPeer(ctx context.Context, nodeName string) (* for _, rtd := range registeredTransportDetails { registeredTransportNames = append(registeredTransportNames, rtd.Transport) } - return nil, i18n.NewError(p.ctx, msgs.MsgTransportNoTransportsConfiguredForNode, nodeName, registeredTransportNames) + return i18n.NewError(p.ctx, msgs.MsgTransportNoTransportsConfiguredForNode, p.Name, registeredTransportNames) } // Activate the connection (the deactivate is deferred to the send loop) - res, err := p.transport.api.ActivateNode(ctx, &prototk.ActivateNodeRequest{ - NodeName: nodeName, + res, err := p.transport.api.ActivateNode(p.ctx, &prototk.ActivateNodeRequest{ + NodeName: p.Name, TransportDetails: remoteTransportDetails, }) if err != nil { - return nil, err + return err } - if err = json.Unmarshal([]byte(res.PeerInfoJson), &p.peerInfo); err != nil { - log.L(ctx).Errorf("Invalid peerInfo: %s", p.peerInfo) - return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidPeerInfo) + if err = json.Unmarshal([]byte(res.PeerInfoJson), &p.Outbound); err != nil { + // We've already activated at this point, so we need to keep going - but this + // will mean there's no peer info, so we put it in as a string + log.L(p.ctx).Warnf("Invalid peerInfo: %s", res.PeerInfoJson) + p.Outbound = map[string]any{"info": string(res.PeerInfoJson)} } - log.L(ctx).Debugf("connected to peer '%s'", nodeName) - tm.peers[nodeName] = p + log.L(p.ctx).Debugf("connected to peer '%s'", p.Name) + p.senderStarted.Store(true) go p.sender() - return p, nil + return nil } func (p *peer) notifyPersistedMsgAvailable() { @@ -171,12 +197,21 @@ func (p *peer) notifyPersistedMsgAvailable() { } func (p *peer) send(msg *prototk.PaladinMsg) error { - return p.tm.sendShortRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { - return true, p.transport.send(p.ctx, p.name, msg) + err := p.tm.sendShortRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { + return true, p.transport.send(p.ctx, p.Name, msg) }) + if err == nil { + now := tktypes.TimestampNow() + p.statsLock.Lock() + defer p.statsLock.Unlock() + p.Stats.LastSend = &now + p.Stats.SentMsgs++ + p.Stats.SentBytes += uint64(len(msg.Payload)) + } + return err } -func (p *peer) senderDone() { +func (p *peer) senderCleanup() { p.deactivate() // There's a very small window where we might have got delivered a message by a routine @@ -186,30 +221,30 @@ func (p *peer) senderDone() { select { case msg := <-p.sendQueue: log.L(p.ctx).Infof("message delivered in inactivity quiesce window. Re-connecting") - _ = p.tm.queueFireAndForget(p.ctx, p.name, msg) + _ = p.tm.queueFireAndForget(p.ctx, p.Name, msg) case <-p.persistedMsgsAvailable: log.L(p.ctx).Infof("reliable message delivered in inactivity quiesce window. Re-connecting") - _, _ = p.tm.getPeer(p.ctx, p.name) + _, _ = p.tm.getPeer(p.ctx, p.Name, true) case <-time.After(p.tm.quiesceTimeout): p.quiescing = false } } - close(p.done) + close(p.senderDone) } func (p *peer) deactivate() { // Hold the peers write lock to do this p.tm.peersLock.Lock() defer p.tm.peersLock.Unlock() - delete(p.tm.peers, p.name) + delete(p.tm.peers, p.Name) // Holding the lock while activating/deactivating ensures we never dual-activate in the transport - log.L(p.ctx).Infof("peer %s deactivating", p.name) + log.L(p.ctx).Infof("peer %s deactivating", p.Name) if _, err := p.transport.api.DeactivateNode(p.ctx, &prototk.DeactivateNodeRequest{ - NodeName: p.name, + NodeName: p.Name, }); err != nil { - log.L(p.ctx).Warnf("peer %s returned deactivation error: %s", p.name, err) + log.L(p.ctx).Warnf("peer %s returned deactivation error: %s", p.Name, err) } } @@ -385,9 +420,9 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e } func (p *peer) sender() { - defer p.senderDone() + defer p.senderCleanup() - log.L(p.ctx).Infof("peer %s active", p.name) + log.L(p.ctx).Infof("peer %s active", p.Name) checkNew := false hitInactivityTimeout := false @@ -402,7 +437,8 @@ func (p *peer) sender() { } // Depending on our persistent message status, check if we're able to quiesce - if hitInactivityTimeout && p.persistentMsgsDrained { + if hitInactivityTimeout && p.persistentMsgsDrained && + (p.Stats.LastReceive == nil || time.Since(p.Stats.LastReceive.Time()) > p.tm.peerInactivityTimeout) { p.quiescing = true return // quiesce handling is in senderDone() deferred function } @@ -412,23 +448,22 @@ func (p *peer) sender() { // Our wait timeout needs to be the shortest of: // - The full re-scan timeout for reliable messages // - The inactivity timeout - inactivityTimeout := p.tm.reliableMessageResend - if inactivityTimeout > p.tm.peerInactivityTimeout { - inactivityTimeout = p.tm.peerInactivityTimeout - } - inactivityTimer := time.NewTimer(inactivityTimeout) + resendTimer := time.NewTimer(p.tm.reliableMessageResend) processingMsgs := true for processingMsgs { select { - case <-inactivityTimer.C: + case <-resendTimer.C: hitInactivityTimeout = true processingMsgs = false // spin round and check if we have persisted messages to (re)process case <-p.persistedMsgsAvailable: + resendTimer.Stop() checkNew = true processingMsgs = false // spin round and get the messages case <-p.ctx.Done(): + resendTimer.Stop() return // we're done case msg := <-p.sendQueue: + resendTimer.Stop() // send and spin straight round if err := p.send(msg); err != nil { log.L(p.ctx).Errorf("failed to send message '%s' after short retry (discarding): %s", msg.MessageId, err) @@ -440,5 +475,7 @@ func (p *peer) sender() { func (p *peer) close() { p.cancelCtx() - <-p.done + if p.senderStarted.Load() { + <-p.senderDone + } } diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index c3a800583..ad63fd457 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -208,7 +208,7 @@ func TestReliableMessageSendSendQuiesceRealDB(t *testing.T) { } // Wait for the peer to end via quiesce - <-p.done + <-p.senderDone } @@ -265,7 +265,7 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { require.NoError(t, err) } require.NotNil(t, rmWithAck.Ack) - require.Regexp(t, "PD012017", rmWithAck.Ack.Error) + require.Regexp(t, "PD012016", rmWithAck.Ack.Error) // Second nack rmWithAck, err = tm.getReliableMessageByID(ctx, tm.persistence.DB(), rm2.ID) @@ -278,19 +278,19 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { func TestNameSortedPeers(t *testing.T) { peerList := nameSortedPeers{ - {name: "ccc"}, - {name: "aaa"}, - {name: "ddd"}, - {name: "bbb"}, + {PeerInfo: pldapi.PeerInfo{Name: "ccc"}}, + {PeerInfo: pldapi.PeerInfo{Name: "aaa"}}, + {PeerInfo: pldapi.PeerInfo{Name: "ddd"}}, + {PeerInfo: pldapi.PeerInfo{Name: "bbb"}}, } sort.Sort(peerList) require.Equal(t, nameSortedPeers{ - {name: "aaa"}, - {name: "bbb"}, - {name: "ccc"}, - {name: "ddd"}, + {PeerInfo: pldapi.PeerInfo{Name: "aaa"}}, + {PeerInfo: pldapi.PeerInfo{Name: "bbb"}}, + {PeerInfo: pldapi.PeerInfo{Name: "ccc"}}, + {PeerInfo: pldapi.PeerInfo{Name: "ddd"}}, }, peerList) } @@ -322,7 +322,7 @@ func TestConnectionRace(t *testing.T) { connDone := make(chan bool) for i := 0; i < 2; i++ { go func() { - _, err := tm.connectPeer(ctx, "node2") + _, err := tm.connectPeer(ctx, "node2", true) require.NoError(t, err) connDone <- true }() @@ -344,7 +344,7 @@ func TestActivateFail(t *testing.T) { return nil, fmt.Errorf("pop") } - _, err := tm.getPeer(ctx, "node2") + _, err := tm.getPeer(ctx, "node2", true) assert.Regexp(t, "pop", err) } @@ -355,11 +355,12 @@ func TestActivateBadPeerInfo(t *testing.T) { defer done() tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { - return &prototk.ActivateNodeResponse{PeerInfoJson: ""}, nil + return &prototk.ActivateNodeResponse{PeerInfoJson: "!{ not valid JSON"}, nil } - _, err := tm.getPeer(ctx, "node2") - assert.Regexp(t, "PD012015", err) + p, err := tm.getPeer(ctx, "node2", true) + assert.NoError(t, err) + assert.Regexp(t, "!{ not valid JSON", p.Outbound["info"]) } @@ -375,21 +376,21 @@ func TestQuiesceDetectPersistentMessage(t *testing.T) { mockActivateDeactivateOk(tp) - quiescingPeer, err := tm.getPeer(ctx, "node2") + quiescingPeer, err := tm.getPeer(ctx, "node2", true) require.NoError(t, err) // Force cancel that peer quiescingPeer.cancelCtx() - <-quiescingPeer.done + <-quiescingPeer.senderDone // Simulate quiescing with persistent messages delivered quiescingPeer.quiescing = true - quiescingPeer.done = make(chan struct{}) + quiescingPeer.senderDone = make(chan struct{}) quiescingPeer.persistedMsgsAvailable = make(chan struct{}, 1) quiescingPeer.persistedMsgsAvailable <- struct{}{} // Now in quiesce it will start up a new one - quiescingPeer.senderDone() + quiescingPeer.senderCleanup() require.NotNil(t, tm.peers["node2"]) @@ -411,17 +412,17 @@ func TestQuiesceDetectFireAndForgetMessage(t *testing.T) { mockActivateDeactivateOk(tp) - quiescingPeer, err := tm.getPeer(ctx, "node2") + quiescingPeer, err := tm.getPeer(ctx, "node2", true) require.NoError(t, err) // Force cancel that peer quiescingPeer.cancelCtx() - <-quiescingPeer.done + <-quiescingPeer.senderDone // Simulate quiescing with persistent messages delivered quiescingPeer.quiescing = true quiescingPeer.ctx = ctx - quiescingPeer.done = make(chan struct{}) + quiescingPeer.senderDone = make(chan struct{}) quiescingPeer.sendQueue = make(chan *prototk.PaladinMsg, 1) quiescingPeer.sendQueue <- &prototk.PaladinMsg{ MessageId: uuid.NewString(), @@ -438,7 +439,7 @@ func TestQuiesceDetectFireAndForgetMessage(t *testing.T) { return nil, nil } // Now in quiesce it will start up a new one - quiescingPeer.senderDone() + quiescingPeer.senderCleanup() require.NotNil(t, tm.peers["node2"]) @@ -465,7 +466,7 @@ func TestDeactivateFail(t *testing.T) { return nil, fmt.Errorf("pop") } - _, err := tm.getPeer(ctx, "node2") + _, err := tm.getPeer(ctx, "node2", true) require.NoError(t, err) } diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index 8ed1fa720..a27be54e2 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -300,7 +300,7 @@ func TestSendMessageDestWrong(t *testing.T) { message.Component = prototk.PaladinMsg_TRANSACTION_ENGINE message.Node = "" err := tm.Send(ctx, message) - assert.Regexp(t, "PD012016", err) + assert.Regexp(t, "PD012015", err) message.Component = prototk.PaladinMsg_TRANSACTION_ENGINE message.Node = "node1" @@ -467,12 +467,14 @@ func TestSendContextClosed(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false) done() + p := &peer{ + transport: tp.t, + sendQueue: make(chan *prototk.PaladinMsg), + } tm.peers = map[string]*peer{ - "node2": { - transport: tp.t, - sendQueue: make(chan *prototk.PaladinMsg), - }, + "node2": p, } + p.senderStarted.Store(true) err := tm.Send(ctx, testMessage()) assert.Regexp(t, "PD010301", err) diff --git a/toolkit/go/pkg/pldapi/peerinfo.go b/toolkit/go/pkg/pldapi/peerinfo.go new file mode 100644 index 000000000..d170ae823 --- /dev/null +++ b/toolkit/go/pkg/pldapi/peerinfo.go @@ -0,0 +1,37 @@ +// Copyright © 2024 Kaleido, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pldapi + +import "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" + +type PeerInfo struct { + Name string `docstruct:"PeerInfo" json:"name"` + Stats PeerStats `docstruct:"PeerInfo" json:"stats"` + Activated tktypes.Timestamp `docstruct:"PeerInfo" json:"activated"` + Outbound map[string]any `docstruct:"PeerInfo" json:"outbound,omitempty"` + OutboundError error `docstruct:"PeerInfo" json:"outboundError,omitempty"` +} + +type PeerStats struct { + QueuedMsgs uint64 `docstruct:"PeerStats" json:"queuedMsgs,omitempty"` + SentMsgs uint64 `docstruct:"PeerStats" json:"sentMsgs,omitempty"` + ReceivedMsgs uint64 `docstruct:"PeerStats" json:"receivedMsgs,omitempty"` + SentBytes uint64 `docstruct:"PeerStats" json:"sentBytes,omitempty"` + ReceivedBytes uint64 `docstruct:"PeerStats" json:"receivedBytes,omitempty"` + LastSend *tktypes.Timestamp `docstruct:"PeerStats" json:"lastSend,omitempty"` + LastReceive *tktypes.Timestamp `docstruct:"PeerStats" json:"lastReceive,omitempty"` +} From 253859c27d878d64102074a781bd0706de5675ba Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Thu, 2 Jan 2025 15:43:11 -0500 Subject: [PATCH 16/41] Initial work on receive message processing with acks Signed-off-by: Peter Broadhurst --- config/pkg/pldconf/transportmgr.go | 6 + core/go/internal/components/transportmgr.go | 4 - .../identityresolver/identityresolver.go | 2 +- core/go/internal/msgs/en_errors.go | 3 + .../prepared_transaction_distributer.go | 6 - .../internal/privatetxnmgr/private_txn_mgr.go | 6 +- core/go/internal/transportmgr/manager.go | 50 ++--- core/go/internal/transportmgr/manager_test.go | 58 ++---- core/go/internal/transportmgr/msghandler.go | 172 ++++++++++++++++++ core/go/internal/transportmgr/peer.go | 52 ++++-- core/go/internal/transportmgr/peer_test.go | 29 ++- core/go/internal/transportmgr/transport.go | 33 ++-- .../internal/transportmgr/transport_test.go | 66 ++----- toolkit/go/pkg/pldapi/peerinfo.go | 15 +- toolkit/proto/protos/to_transport.proto | 1 + 15 files changed, 314 insertions(+), 189 deletions(-) create mode 100644 core/go/internal/transportmgr/msghandler.go diff --git a/config/pkg/pldconf/transportmgr.go b/config/pkg/pldconf/transportmgr.go index bf8b6c8d3..3f9e17cd7 100644 --- a/config/pkg/pldconf/transportmgr.go +++ b/config/pkg/pldconf/transportmgr.go @@ -23,6 +23,7 @@ type TransportManagerConfig struct { SendRetry RetryConfigWithMax `json:"sendRetry"` ReliableScanRetry RetryConfig `json:"reliableScanRetry"` ReliableMessageResend *string `json:"reliableMessageResend"` + ReliableMessageWriter FlushWriterConfig `json:"reliableMessageWriter"` Transports map[string]*TransportConfig `json:"transports"` } @@ -44,6 +45,11 @@ var TransportManagerDefaults = &TransportManagerConfig{ }, MaxAttempts: confutil.P(3), }, + ReliableMessageWriter: FlushWriterConfig{ + WorkerCount: confutil.P(1), + BatchTimeout: confutil.P("250ms"), + BatchMaxSize: confutil.P(50), + }, } type TransportConfig struct { diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index 23e767759..5d847ceb7 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -146,8 +146,4 @@ type TransportManager interface { // // The pre-commit handler must be called after the DB transaction commits to trigger the delivery. SendReliable(ctx context.Context, dbTX *gorm.DB, msg *ReliableMessage) (preCommit func(), err error) - - // RegisterClient registers a client to receive messages from the transport manager - // messages are routed to the client based on the Destination field of the message matching the value returned from Destination() function of the TransportClient - RegisterClient(ctx context.Context, client TransportClient) error } diff --git a/core/go/internal/identityresolver/identityresolver.go b/core/go/internal/identityresolver/identityresolver.go index 68a6431f0..14b440ed7 100644 --- a/core/go/internal/identityresolver/identityresolver.go +++ b/core/go/internal/identityresolver/identityresolver.go @@ -70,7 +70,7 @@ func (ir *identityResolver) PostInit(c components.AllComponents) error { ir.nodeName = c.TransportManager().LocalNodeName() ir.keyManager = c.KeyManager() ir.transportManager = c.TransportManager() - return c.TransportManager().RegisterClient(ir.bgCtx, ir) + return nil } func (ir *identityResolver) Start() error { diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index 9b3dea51f..b77a48830 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -370,6 +370,9 @@ var ( MsgTransportStateNotAvailableLocally = ffe("PD012014", "State not available locally: domain=%s,contract=%s,id=%s") MsgTransportInvalidTargetNode = ffe("PD012015", "Invalid target node '%s'") MsgTransportInvalidMessageData = ffe("PD012016", "Invalid data for message %s") + MsgTransportUnsupportedReliableMsgType = ffe("PD012017", "Unsupported reliable message type '%s'") + MsgTransportAckMissingCorrelationID = ffe("PD012018", "Ack/nack missing correlation ID") + MsgTransportNackMissingError = ffe("PD012019", "Nack missing error information") // RegistryManager module PD0121XX MsgRegistryNodeEntiresNotFound = ffe("PD012100", "No entries found for node '%s'") diff --git a/core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go b/core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go index bbb6f7f6b..c33d8c0a9 100644 --- a/core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go +++ b/core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go @@ -103,12 +103,6 @@ func (sd *preparedTransactionDistributer) Start(bgCtx context.Context) error { ctx := sd.runCtx log.L(ctx).Info("preparedTransactionDistributer:Start") - err := sd.transportManager.RegisterClient(ctx, sd) - if err != nil { - log.L(ctx).Errorf("Error registering transport client: %s", err) - return err - } - sd.acknowledgementWriter.Start() sd.receivedPreparedTransactionWriter.Start() diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr.go b/core/go/internal/privatetxnmgr/private_txn_mgr.go index 48ffdb0b8..872f3426e 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr.go @@ -87,11 +87,7 @@ func (p *privateTxManager) PostInit(c components.AllComponents) error { p.components.Persistence(), &p.config.PreparedTransactionDistributer) - err := p.preparedTransactionDistributer.Start(p.ctx) - if err != nil { - return err - } - return p.components.TransportManager().RegisterClient(p.ctx, p) + return p.preparedTransactionDistributer.Start(p.ctx) } func (p *privateTxManager) Start() error { diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 16b21018b..4956aa7af 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -25,6 +25,7 @@ import ( "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/core/internal/flushwriter" "github.com/kaleido-io/paladin/core/internal/msgs" "github.com/kaleido-io/paladin/core/pkg/persistence" "gorm.io/gorm" @@ -42,23 +43,24 @@ type transportManager struct { bgCtx context.Context mux sync.Mutex - rpcModule *rpcserver.RPCModule - conf *pldconf.TransportManagerConfig - localNodeName string - registryManager components.RegistryManager - stateManager components.StateManager - persistence persistence.Persistence + rpcModule *rpcserver.RPCModule + conf *pldconf.TransportManagerConfig + localNodeName string + registryManager components.RegistryManager + stateManager components.StateManager + domainManager components.DomainManager + privateTxManager components.PrivateTxManager + identityResolver components.IdentityResolver + persistence persistence.Persistence transportsByID map[uuid.UUID]*transport transportsByName map[string]*transport - components map[prototk.PaladinMsg_Component]components.TransportClient - destinationsFixed bool - destinationsMux sync.RWMutex - peersLock sync.RWMutex peers map[string]*peer + reliableMsgWriter flushwriter.Writer[*reliableMsgOp, *noResult] + sendShortRetry *retry.Retry reliableScanRetry *retry.Retry peerInactivityTimeout time.Duration @@ -70,13 +72,12 @@ type transportManager struct { } func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerConfig) components.TransportManager { - return &transportManager{ + tm := &transportManager{ bgCtx: bgCtx, conf: conf, localNodeName: conf.NodeName, transportsByID: make(map[uuid.UUID]*transport), transportsByName: make(map[string]*transport), - components: make(map[prototk.PaladinMsg_Component]components.TransportClient), peers: make(map[string]*peer), senderBufferLen: confutil.IntMin(conf.SendQueueLen, 0, *pldconf.TransportManagerDefaults.SendQueueLen), reliableMessageResend: confutil.DurationMin(conf.ReliableMessageResend, 100*time.Millisecond, *pldconf.TransportManagerDefaults.ReliableMessageResend), @@ -86,6 +87,9 @@ func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerCo quiesceTimeout: 1 * time.Second, // not currently tunable (considered very small edge case) reliableMessagePageSize: 100, // not currently tunable } + tm.reliableMsgWriter = flushwriter.NewWriter(bgCtx, tm.handleReliableMsgBatch, tm.persistence, + &conf.ReliableMessageWriter, &pldconf.TransportManagerDefaults.ReliableMessageWriter) + return tm } func (tm *transportManager) PreInit(pic components.PreInitComponents) (*components.ManagerInitResult, error) { @@ -104,15 +108,14 @@ func (tm *transportManager) PostInit(c components.AllComponents) error { // that could have cached a nil value in memory. tm.registryManager = c.RegistryManager() tm.stateManager = c.StateManager() + tm.domainManager = c.DomainManager() + tm.privateTxManager = c.PrivateTxManager() + tm.identityResolver = c.IdentityResolver() tm.persistence = c.Persistence() return nil } func (tm *transportManager) Start() error { - tm.destinationsMux.Lock() - defer tm.destinationsMux.Unlock() - // All destinations must be registered as part of the startup sequence - tm.destinationsFixed = true return nil } @@ -135,21 +138,6 @@ func (tm *transportManager) Stop() { } -func (tm *transportManager) RegisterClient(ctx context.Context, client components.TransportClient) error { - tm.destinationsMux.Lock() - defer tm.destinationsMux.Unlock() - if tm.destinationsFixed { - return i18n.NewError(tm.bgCtx, msgs.MsgTransportClientRegisterAfterStartup, client.Destination()) - } - if _, found := tm.components[client.Destination()]; found { - log.L(ctx).Errorf("Client already registered for destination %s", client.Destination()) - return i18n.NewError(tm.bgCtx, msgs.MsgTransportClientAlreadyRegistered, client.Destination()) - } - tm.components[client.Destination()] = client - return nil - -} - func (tm *transportManager) cleanupTransport(t *transport) { // must not hold the transport lock when running this t.close() diff --git a/core/go/internal/transportmgr/manager_test.go b/core/go/internal/transportmgr/manager_test.go index 3e447d8c4..59e1995fe 100644 --- a/core/go/internal/transportmgr/manager_test.go +++ b/core/go/internal/transportmgr/manager_test.go @@ -22,7 +22,6 @@ import ( "github.com/google/uuid" "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/mocks/componentmocks" "github.com/kaleido-io/paladin/core/pkg/persistence" "github.com/kaleido-io/paladin/core/pkg/persistence/mockpersistence" @@ -36,17 +35,23 @@ import ( ) type mockComponents struct { - c *componentmocks.AllComponents - db *mockpersistence.SQLMockProvider - p persistence.Persistence - registryManager *componentmocks.RegistryManager - stateManager *componentmocks.StateManager + c *componentmocks.AllComponents + db *mockpersistence.SQLMockProvider + p persistence.Persistence + registryManager *componentmocks.RegistryManager + stateManager *componentmocks.StateManager + domainManager *componentmocks.DomainManager + privateTxManager *componentmocks.PrivateTxManager + identityResolver *componentmocks.IdentityResolver } func newMockComponents(t *testing.T, realDB bool) *mockComponents { mc := &mockComponents{c: componentmocks.NewAllComponents(t)} mc.registryManager = componentmocks.NewRegistryManager(t) mc.stateManager = componentmocks.NewStateManager(t) + mc.domainManager = componentmocks.NewDomainManager(t) + mc.privateTxManager = componentmocks.NewPrivateTxManager(t) + mc.identityResolver = componentmocks.NewIdentityResolver(t) if realDB { p, cleanup, err := persistence.NewUnitTestPersistence(context.Background(), "transportmgr") require.NoError(t, err) @@ -61,21 +66,20 @@ func newMockComponents(t *testing.T, realDB bool) *mockComponents { mc.c.On("Persistence").Return(mc.p).Maybe() mc.c.On("RegistryManager").Return(mc.registryManager).Maybe() mc.c.On("StateManager").Return(mc.stateManager).Maybe() + mc.c.On("DomainManager").Return(mc.domainManager).Maybe() + mc.c.On("PrivateTxManager").Return(mc.privateTxManager).Maybe() + mc.c.On("IdentityResolver").Return(mc.identityResolver).Maybe() return mc } -func newTestTransportManager(t *testing.T, realDB bool, conf *pldconf.TransportManagerConfig, extraSetup ...func(mc *mockComponents) components.TransportClient) (context.Context, *transportManager, *mockComponents, func()) { +func newTestTransportManager(t *testing.T, realDB bool, conf *pldconf.TransportManagerConfig, extraSetup ...func(mc *mockComponents)) (context.Context, *transportManager, *mockComponents, func()) { ctx, cancelCtx := context.WithCancel(context.Background()) oldLevel := logrus.GetLevel() logrus.SetLevel(logrus.TraceLevel) mc := newMockComponents(t, realDB) - var clients []components.TransportClient for _, fn := range extraSetup { - client := fn(mc) - if client != nil { - clients = append(clients, client) - } + fn(mc) } tm := NewTransportManager(ctx, conf) @@ -84,12 +88,6 @@ func newTestTransportManager(t *testing.T, realDB bool, conf *pldconf.TransportM require.NoError(t, err) assert.NotNil(t, ir) - // registration happens during init - for _, c := range clients { - err := tm.RegisterClient(ctx, c) - require.NoError(t, err) - } - err = tm.PostInit(mc.c) require.NoError(t, err) @@ -168,30 +166,6 @@ func TestConfigureTransportFail(t *testing.T) { assert.Regexp(t, "pop", *tp.t.initError.Load()) } -func TestDoubleRegisterClient(t *testing.T) { - tm := NewTransportManager(context.Background(), &pldconf.TransportManagerConfig{}) - - receivingClient := componentmocks.NewTransportClient(t) - receivingClient.On("Destination").Return(prototk.PaladinMsg_TRANSACTION_ENGINE) - - err := tm.RegisterClient(context.Background(), receivingClient) - require.NoError(t, err) - - err = tm.RegisterClient(context.Background(), receivingClient) - assert.Regexp(t, "PD012010", err) -} - -func TestDoubleRegisterAfterStart(t *testing.T) { - tm := NewTransportManager(context.Background(), &pldconf.TransportManagerConfig{}) - tm.(*transportManager).destinationsFixed = true - - receivingClient := componentmocks.NewTransportClient(t) - receivingClient.On("Destination").Return(prototk.PaladinMsg_TRANSACTION_ENGINE) - - err := tm.RegisterClient(context.Background(), receivingClient) - assert.Regexp(t, "PD012012", err) -} - func TestGetLocalTransportDetailsNotFound(t *testing.T) { tm := NewTransportManager(context.Background(), &pldconf.TransportManagerConfig{}).(*transportManager) diff --git a/core/go/internal/transportmgr/msghandler.go b/core/go/internal/transportmgr/msghandler.go new file mode 100644 index 000000000..004b6a196 --- /dev/null +++ b/core/go/internal/transportmgr/msghandler.go @@ -0,0 +1,172 @@ +/* + * Copyright © 2024 Kaleido, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package transportmgr + +import ( + "context" + "encoding/json" + + "github.com/google/uuid" + "github.com/hyperledger/firefly-common/pkg/i18n" + "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/core/internal/flushwriter" + "github.com/kaleido-io/paladin/core/internal/msgs" + "github.com/kaleido-io/paladin/toolkit/pkg/log" + "github.com/kaleido-io/paladin/toolkit/pkg/prototk" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" + "gorm.io/gorm" +) + +const ( + RMHMessageTypeAck = "ack" + RMHMessageTypeNack = "nack" + RMHMessageTypeStateDistribution = string(components.RMTState) + RMHMessageTypeStateReceipt = string(components.RMTReceipt) +) + +type reliableMsgOp struct { + msgID uuid.UUID + p *peer + msg *prototk.PaladinMsg +} + +func (op *reliableMsgOp) WriteKey() string { + return op.p.Name +} + +type noResult struct{} + +type ackInfo struct { + node string + id uuid.UUID // sent in CID on wire + Error error `json:"error"` +} + +// p, err := tm.getPeer(ctx, v.node, false) +// if err != nil { +// log.L(ctx).Errorf("Discarding message from invalid peer '%s': %s", v.node, err) +// continue +// } +// p.updateReceivedStats(v.msg) + +func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *gorm.DB, values []*reliableMsgOp) (func(error), []flushwriter.Result[*noResult], error) { + + var acksToWrite []*components.ReliableMessageAck + var acksToSend []*ackInfo + var statesToAdd []*components.StateUpsertOutsideContext + + for _, v := range values { + + switch v.msg.MessageType { + case RMHMessageTypeStateDistribution: + _, stateToAdd, err := parseStateDistribution(ctx, v.msgID, v.msg.Payload) + if err != nil { + acksToSend = append(acksToSend, + &ackInfo{node: v.p.Name, id: v.msgID, Error: err}, // reject the message permanently + ) + } else { + statesToAdd = append(statesToAdd, stateToAdd) + acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msgID}) + } + case RMHMessageTypeAck, RMHMessageTypeNack: + ackNackToWrite := tm.parseReceivedAckNack(ctx, v.msg) + if ackNackToWrite != nil { + acksToWrite = append(acksToWrite, ackNackToWrite) + } + default: + err := i18n.NewError(ctx, msgs.MsgTransportUnsupportedReliableMsgType, v.msg.MessageType) + acksToSend = append(acksToSend, + &ackInfo{node: v.p.Name, id: v.msgID, Error: err}, // reject the message permanently + ) + } + } + + if len(acksToWrite) > 0 { + if err := tm.writeAcks(ctx, dbTX, acksToWrite...); err != nil { + return nil, nil, err + } + } + + return func(err error) { + if err == nil { + // We've committed the database work ok - send the acks/nacks to the other side + for _, a := range acksToSend { + cid := a.id.String() + msgType := RMHMessageTypeAck + if a.Error != nil { + msgType = RMHMessageTypeNack + } + _ = tm.queueFireAndForget(ctx, a.node, &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: msgType, + CorrelationId: &cid, + Payload: tktypes.JSONString(a), + }) + } + } + }, make([]flushwriter.Result[*noResult], len(values)), nil + +} + +func (tm *transportManager) parseReceivedAckNack(ctx context.Context, msg *prototk.PaladinMsg) *components.ReliableMessageAck { + var info ackInfo + var cid uuid.UUID + err := json.Unmarshal(msg.Payload, &info) + if msg.CorrelationId == nil { + err = i18n.NewError(ctx, msgs.MsgTransportAckMissingCorrelationID) + } + if err == nil { + cid, err = uuid.Parse(*msg.CorrelationId) + } + if err != nil { + log.L(ctx).Errorf("Received invalid ack/nack: %s", msg.Payload) + return nil + } + ackNackToWrite := &components.ReliableMessageAck{ + MessageID: cid, + Time: tktypes.TimestampNow(), + } + if msg.MessageType == RMHMessageTypeNack { + if info.Error == nil { + info.Error = i18n.NewError(ctx, msgs.MsgTransportNackMissingError) + } + ackNackToWrite.Error = info.Error.Error() + } + return ackNackToWrite +} + +func parseStateDistribution(ctx context.Context, msgID uuid.UUID, data []byte) (sd *components.StateDistributionWithData, parsed *components.StateUpsertOutsideContext, err error) { + parsed = &components.StateUpsertOutsideContext{} + var contractAddr *tktypes.EthAddress + err = json.Unmarshal(data, &sd) + if err == nil { + parsed.ID, err = tktypes.ParseHexBytes(ctx, sd.StateID) + } + if err == nil { + parsed.SchemaID, err = tktypes.ParseBytes32(sd.SchemaID) + } + if err == nil { + contractAddr, err = tktypes.ParseEthAddress(sd.ContractAddress) + } + if err == nil { + parsed.ContractAddress = *contractAddr + } + if err != nil { + return nil, nil, i18n.WrapError(ctx, err, msgs.MsgTransportInvalidMessageData, msgID) + } + return +} diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 7fbf0981f..5d737fcef 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -196,7 +196,7 @@ func (p *peer) notifyPersistedMsgAvailable() { } } -func (p *peer) send(msg *prototk.PaladinMsg) error { +func (p *peer) send(msg *prototk.PaladinMsg, reliableSeq *uint64) error { err := p.tm.sendShortRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { return true, p.transport.send(p.ctx, p.Name, msg) }) @@ -207,10 +207,25 @@ func (p *peer) send(msg *prototk.PaladinMsg) error { p.Stats.LastSend = &now p.Stats.SentMsgs++ p.Stats.SentBytes += uint64(len(msg.Payload)) + if reliableSeq != nil && *reliableSeq > p.Stats.ReliableHighestSent { + p.Stats.ReliableHighestSent = *reliableSeq + } + if p.lastDrainHWM != nil { + p.Stats.ReliableAckBase = *p.lastDrainHWM + } } return err } +func (p *peer) updateReceivedStats(msg *prototk.PaladinMsg) { + now := tktypes.TimestampNow() + p.statsLock.Lock() + defer p.statsLock.Unlock() + p.Stats.LastReceive = &now + p.Stats.ReceivedMsgs++ + p.Stats.ReceivedBytes += uint64(len(msg.Payload)) +} + func (p *peer) senderCleanup() { p.deactivate() @@ -319,28 +334,19 @@ func (p *peer) reliableMessageScan(checkNew bool) error { func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*prototk.PaladinMsg, error, error) { // Validate the message first (not retryable) - var sd components.StateDistributionWithData - var stateID tktypes.HexBytes - var contractAddr *tktypes.EthAddress - parseErr := json.Unmarshal(rm.Metadata, &sd) - if parseErr == nil { - stateID, parseErr = tktypes.ParseHexBytes(p.ctx, sd.StateID) - } - if parseErr == nil { - contractAddr, parseErr = tktypes.ParseEthAddress(sd.ContractAddress) - } + sd, parsed, parseErr := parseStateDistribution(p.ctx, rm.ID, rm.Metadata) if parseErr != nil { - return nil, i18n.WrapError(p.ctx, parseErr, msgs.MsgTransportInvalidMessageData, rm.ID), nil + return nil, parseErr, nil } // Get the state - distinguishing between not found, vs. a retryable error - state, err := p.tm.stateManager.GetState(p.ctx, p.tm.persistence.DB(), sd.Domain, *contractAddr, stateID, false, false) + state, err := p.tm.stateManager.GetState(p.ctx, p.tm.persistence.DB(), sd.Domain, parsed.ContractAddress, parsed.ID, false, false) if err != nil { return nil, nil, err } if state == nil { return nil, - i18n.NewError(p.ctx, msgs.MsgTransportStateNotAvailableLocally, sd.Domain, *contractAddr, stateID), + i18n.NewError(p.ctx, msgs.MsgTransportStateNotAvailableLocally, sd.Domain, parsed.ContractAddress, parsed.ID), nil } sd.StateData = state.Data @@ -348,15 +354,20 @@ func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*proto return &prototk.PaladinMsg{ MessageId: rm.ID.String(), Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: string(rm.MessageType), + MessageType: RMHMessageTypeStateDistribution, Payload: tktypes.JSONString(sd), }, nil, nil } func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err error) { + type paladinMsgWithSeq struct { + *prototk.PaladinMsg + seq uint64 + } + // Build the messages - msgsToSend := make([]*prototk.PaladinMsg, 0, len(page)) + msgsToSend := make([]paladinMsgWithSeq, 0, len(page)) var errorAcks []*components.ReliableMessageAck for _, rm := range page { @@ -390,7 +401,10 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e Error: errorAck.Error(), }) case msg != nil: - msgsToSend = append(msgsToSend, msg) + msgsToSend = append(msgsToSend, paladinMsgWithSeq{ + seq: rm.Sequence, + PaladinMsg: msg, + }) } } @@ -410,7 +424,7 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e // We fail the whole page on error, so we don't thrash (the outer infinite retry // gives a much longer maximum back-off). for _, msg := range msgsToSend { - if err := p.send(msg); err != nil { + if err := p.send(msg.PaladinMsg, &msg.seq); err != nil { return err } } @@ -465,7 +479,7 @@ func (p *peer) sender() { case msg := <-p.sendQueue: resendTimer.Stop() // send and spin straight round - if err := p.send(msg); err != nil { + if err := p.send(msg, nil); err != nil { log.L(p.ctx).Errorf("failed to send message '%s' after short retry (discarding): %s", msg.MessageId, err) } } diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index ad63fd457..e22bae588 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -39,14 +39,13 @@ import ( "gorm.io/gorm" ) -func mockGetStateRetryThenOk(mc *mockComponents) components.TransportClient { +func mockGetStateRetryThenOk(mc *mockComponents) { mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false). Return(nil, fmt.Errorf("pop")).Once() mockGetStateOk(mc) - return nil } -func mockGetStateOk(mc *mockComponents) components.TransportClient { +func mockGetStateOk(mc *mockComponents) { mGS := mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false) mGS.Run(func(args mock.Arguments) { mGS.Return(&pldapi.State{ @@ -58,7 +57,6 @@ func mockGetStateOk(mc *mockComponents) components.TransportClient { }, }, nil) }) - return nil } func TestReliableMessageResendRealDB(t *testing.T) { @@ -96,6 +94,7 @@ func TestReliableMessageResendRealDB(t *testing.T) { sds[i] = &components.StateDistribution{ Domain: "domain1", ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), StateID: tktypes.RandHex(32), } @@ -174,6 +173,7 @@ func TestReliableMessageSendSendQuiesceRealDB(t *testing.T) { sd := &components.StateDistribution{ Domain: "domain1", ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), StateID: tktypes.RandHex(32), } @@ -216,11 +216,10 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, true, mockGoodTransport, - func(mc *mockComponents) components.TransportClient { + func(mc *mockComponents) { // missing state mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false). Return(nil, nil).Once() - return nil }, ) defer done() @@ -250,6 +249,7 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { Metadata: tktypes.JSONString(&components.StateDistribution{ Domain: "domain1", ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), StateID: tktypes.RandHex(32), }), } @@ -301,7 +301,7 @@ func TestConnectionRace(t *testing.T) { connRelease := make(chan struct{}) ctx, tm, tp, done := newTestTransport(t, false, - func(mc *mockComponents) components.TransportClient { + func(mc *mockComponents) { mGNT := mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node2", @@ -313,7 +313,6 @@ func TestConnectionRace(t *testing.T) { close(connWaiting) <-connRelease }) - return nil }, ) defer done() @@ -372,7 +371,7 @@ func TestQuiesceDetectPersistentMessage(t *testing.T) { // Load up a notification for a persistent message tm.reliableMessageResend = 10 * time.Millisecond tm.peerInactivityTimeout = 1 * time.Second - tm.quiesceTimeout = 1 * time.Second + tm.quiesceTimeout = 1 * time.Millisecond mockActivateDeactivateOk(tp) @@ -408,7 +407,7 @@ func TestQuiesceDetectFireAndForgetMessage(t *testing.T) { // Load up a notification for a persistent message tm.reliableMessageResend = 1 * time.Second tm.peerInactivityTimeout = 1 * time.Second - tm.quiesceTimeout = 1 * time.Second + tm.quiesceTimeout = 1 * time.Millisecond mockActivateDeactivateOk(tp) @@ -473,9 +472,8 @@ func TestDeactivateFail(t *testing.T) { func TestGetReliableMessageByIDFail(t *testing.T) { - ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) { mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnError(fmt.Errorf("pop")) - return nil }) defer done() @@ -525,9 +523,8 @@ func TestProcessReliableMsgPageIgnoreBeforeHWM(t *testing.T) { func TestProcessReliableMsgPageIgnoreUnsupported(t *testing.T) { - ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) { mc.db.Mock.ExpectExec("INSERT.*reliable_msg_acks").WillReturnError(fmt.Errorf("pop")) - return nil }) defer done() @@ -552,9 +549,8 @@ func TestProcessReliableMsgPageInsertFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGetStateOk, - func(mc *mockComponents) components.TransportClient { + func(mc *mockComponents) { mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnResult(driver.ResultNoRows) - return nil }) defer done() @@ -567,6 +563,7 @@ func TestProcessReliableMsgPageInsertFail(t *testing.T) { sd := &components.StateDistribution{ Domain: "domain1", ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), StateID: tktypes.RandHex(32), } diff --git a/core/go/internal/transportmgr/transport.go b/core/go/internal/transportmgr/transport.go index 35a6b903e..41327796f 100644 --- a/core/go/internal/transportmgr/transport.go +++ b/core/go/internal/transportmgr/transport.go @@ -151,30 +151,41 @@ func (t *transport) ReceiveMessage(ctx context.Context, req *prototk.ReceiveMess } } - log.L(ctx).Debugf("transport %s message received id=%s (cid=%s)", t.name, msgID, tktypes.StrOrEmpty(msg.CorrelationId)) + p, err := t.tm.getPeer(ctx, req.FromNode, false /* we do not require a connection for sending here */) + if err != nil { + return nil, err + } + + log.L(ctx).Debugf("transport %s message received from %s id=%s (cid=%s)", t.name, p.Name, msgID, tktypes.StrOrEmpty(msg.CorrelationId)) if log.IsTraceEnabled() { log.L(ctx).Tracef("transport %s message received: %s", t.name, protoToJSON(msg)) } - if err = t.deliverMessage(ctx, msg.Component, msg); err != nil { + if err := t.deliverMessage(ctx, p, msgID, msg); err != nil { return nil, err } return &prototk.ReceiveMessageResponse{}, nil } -func (t *transport) deliverMessage(ctx context.Context, component prototk.PaladinMsg_Component, msg *prototk.PaladinMsg) error { - t.tm.destinationsMux.RLock() - defer t.tm.destinationsMux.RUnlock() +func (t *transport) deliverMessage(ctx context.Context, p *peer, msgID uuid.UUID, msg *prototk.PaladinMsg) error { - // TODO: Reconcile why we're using the identity as the component routing location - Broadhurst/Hosie discussion required - receiver, found := t.tm.components[component] - if !found { - log.L(ctx).Errorf("Component not found: %s", component) - return i18n.NewError(ctx, msgs.MsgTransportComponentNotFound, component.String()) + switch msg.Component { + case prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER: + _ = t.tm.reliableMsgWriter.Queue(ctx, &reliableMsgOp{ + p: p, + msgID: msgID, + msg: msg, + }) + case prototk.PaladinMsg_TRANSACTION_ENGINE: + t.tm.privateTxManager.HandlePaladinMsg(ctx, msg) + case prototk.PaladinMsg_IDENTITY_RESOLVER: + t.tm.identityResolver.HandlePaladinMsg(ctx, msg) + default: + log.L(ctx).Errorf("Component not found for message '%s': %s", msgID, msg.Component) + return i18n.NewError(ctx, msgs.MsgTransportComponentNotFound, msg.Component.String()) } - receiver.HandlePaladinMsg(ctx, msg) return nil } diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index a27be54e2..9a2d7f21d 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -27,7 +27,6 @@ import ( "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/mocks/componentmocks" "github.com/kaleido-io/paladin/toolkit/pkg/plugintk" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/stretchr/testify/assert" @@ -53,7 +52,7 @@ func newTestPlugin(transportFuncs *plugintk.TransportAPIFunctions) *testPlugin { } } -func newTestTransport(t *testing.T, realDB bool, extraSetup ...func(mc *mockComponents) components.TransportClient) (context.Context, *transportManager, *testPlugin, func()) { +func newTestTransport(t *testing.T, realDB bool, extraSetup ...func(mc *mockComponents)) (context.Context, *transportManager, *testPlugin, func()) { ctx, tm, _, done := newTestTransportManager(t, realDB, &pldconf.TransportManagerConfig{ NodeName: "node1", @@ -120,10 +119,9 @@ func testMessage() *components.FireAndForgetMessageSend { } } -func mockEmptyReliableMsgs(mc *mockComponents) components.TransportClient { +func mockEmptyReliableMsgs(mc *mockComponents) { mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnRows(sqlmock.NewRows([]string{})) mc.db.Mock.MatchExpectationsInOrder(false) - return nil } func mockActivateDeactivateOk(tp *testPlugin) { @@ -135,7 +133,7 @@ func mockActivateDeactivateOk(tp *testPlugin) { } } -func mockGoodTransport(mc *mockComponents) components.TransportClient { +func mockGoodTransport(mc *mockComponents) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node2", @@ -143,7 +141,6 @@ func mockGoodTransport(mc *mockComponents) components.TransportClient { Details: `{"likely":"json stuff"}`, }, }, nil) - return nil } func TestSendMessage(t *testing.T) { @@ -174,7 +171,7 @@ func TestSendMessage(t *testing.T) { func TestSendMessageNotInit(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockEmptyReliableMsgs, - func(mc *mockComponents) components.TransportClient { + func(mc *mockComponents) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -182,7 +179,6 @@ func TestSendMessageNotInit(t *testing.T) { Details: `{"likely":"json stuff"}`, }, }, nil) - return nil }) defer done() @@ -199,7 +195,7 @@ func TestSendMessageNotInit(t *testing.T) { func TestSendMessageFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockEmptyReliableMsgs, - func(mc *mockComponents) components.TransportClient { + func(mc *mockComponents) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -207,7 +203,6 @@ func TestSendMessageFail(t *testing.T) { Details: `{"likely":"json stuff"}`, }, }, nil) - return nil }) defer done() @@ -227,9 +222,8 @@ func TestSendMessageFail(t *testing.T) { } func TestSendMessageDestNotFound(t *testing.T) { - ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return(nil, fmt.Errorf("not found")) - return nil }) defer done() @@ -241,7 +235,7 @@ func TestSendMessageDestNotFound(t *testing.T) { } func TestSendMessageDestNotAvailable(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -249,7 +243,6 @@ func TestSendMessageDestNotAvailable(t *testing.T) { Details: `{"not":"the stuff we need"}`, }, }, nil) - return nil }) defer done() @@ -271,7 +264,7 @@ func TestSendMessageDestNotAvailable(t *testing.T) { } func TestGetTransportDetailsOk(t *testing.T) { - ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { + ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -279,7 +272,6 @@ func TestGetTransportDetailsOk(t *testing.T) { Details: `{"the":"stuff we need"}`, }, }, nil) - return nil }) defer done() @@ -322,13 +314,10 @@ func TestSendInvalidMessageNoPayload(t *testing.T) { func TestReceiveMessage(t *testing.T) { receivedMessages := make(chan *prototk.PaladinMsg, 1) - ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) components.TransportClient { - receivingClient := componentmocks.NewTransportClient(t) - receivingClient.On("Destination").Return(prototk.PaladinMsg_TRANSACTION_ENGINE) - receivingClient.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { + ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) { + mc.privateTxManager.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { receivedMessages <- args[1].(*prototk.PaladinMsg) }) - return receivingClient }) defer done() @@ -341,7 +330,8 @@ func TestReceiveMessage(t *testing.T) { } rmr, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Message: msg, + FromNode: "node2", + Message: msg, }) require.NoError(t, err) assert.NotNil(t, rmr) @@ -349,25 +339,7 @@ func TestReceiveMessage(t *testing.T) { <-receivedMessages } -func TestReceiveMessageNoReceiver(t *testing.T) { - ctx, _, tp, done := newTestTransport(t, false) - defer done() - - msg := &prototk.PaladinMsg{ - MessageId: uuid.NewString(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_TRANSACTION_ENGINE, - MessageType: "myMessageType", - Payload: []byte("some data"), - } - - _, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Message: msg, - }) - require.Regexp(t, "PD012011", err) -} - -func TestReceiveMessageInvalidDestination(t *testing.T) { +func TestReceiveMessageInvalidComponent(t *testing.T) { ctx, _, tp, done := newTestTransport(t, false) defer done() @@ -380,7 +352,8 @@ func TestReceiveMessageInvalidDestination(t *testing.T) { } _, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Message: msg, + FromNode: "node2", + Message: msg, }) require.Regexp(t, "PD012011", err) } @@ -426,7 +399,8 @@ func TestReceiveMessageBadDestination(t *testing.T) { Payload: []byte("some data"), } _, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Message: msg, + FromNode: "node2", + Message: msg, }) assert.Regexp(t, "PD012011", err) } @@ -484,9 +458,8 @@ func TestSendContextClosed(t *testing.T) { func TestSendReliableOk(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport, - func(mc *mockComponents) components.TransportClient { + func(mc *mockComponents) { mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnResult(driver.ResultNoRows) - return nil }, ) defer done() @@ -505,9 +478,8 @@ func TestSendReliableOk(t *testing.T) { func TestSendReliableFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport, - func(mc *mockComponents) components.TransportClient { + func(mc *mockComponents) { mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnError(fmt.Errorf("pop")) - return nil }, ) defer done() diff --git a/toolkit/go/pkg/pldapi/peerinfo.go b/toolkit/go/pkg/pldapi/peerinfo.go index d170ae823..6b0e010f3 100644 --- a/toolkit/go/pkg/pldapi/peerinfo.go +++ b/toolkit/go/pkg/pldapi/peerinfo.go @@ -27,11 +27,12 @@ type PeerInfo struct { } type PeerStats struct { - QueuedMsgs uint64 `docstruct:"PeerStats" json:"queuedMsgs,omitempty"` - SentMsgs uint64 `docstruct:"PeerStats" json:"sentMsgs,omitempty"` - ReceivedMsgs uint64 `docstruct:"PeerStats" json:"receivedMsgs,omitempty"` - SentBytes uint64 `docstruct:"PeerStats" json:"sentBytes,omitempty"` - ReceivedBytes uint64 `docstruct:"PeerStats" json:"receivedBytes,omitempty"` - LastSend *tktypes.Timestamp `docstruct:"PeerStats" json:"lastSend,omitempty"` - LastReceive *tktypes.Timestamp `docstruct:"PeerStats" json:"lastReceive,omitempty"` + SentMsgs uint64 `docstruct:"PeerStats" json:"sentMsgs"` + ReceivedMsgs uint64 `docstruct:"PeerStats" json:"receivedMsgs"` + SentBytes uint64 `docstruct:"PeerStats" json:"sentBytes"` + ReceivedBytes uint64 `docstruct:"PeerStats" json:"receivedBytes"` + LastSend *tktypes.Timestamp `docstruct:"PeerStats" json:"lastSend"` + LastReceive *tktypes.Timestamp `docstruct:"PeerStats" json:"lastReceive"` + ReliableHighestSent uint64 `docstruct:"PeerStats" json:"reliableHighestSent"` + ReliableAckBase uint64 `docstruct:"PeerStats" json:"reliableAckBase"` } diff --git a/toolkit/proto/protos/to_transport.proto b/toolkit/proto/protos/to_transport.proto index 116a7ee32..967698d8e 100644 --- a/toolkit/proto/protos/to_transport.proto +++ b/toolkit/proto/protos/to_transport.proto @@ -60,6 +60,7 @@ message PaladinMsg { enum Component { TRANSACTION_ENGINE = 0; RELIABLE_MESSAGE_HANDLER = 1; + IDENTITY_RESOLVER = 2; } string message_id = 1; // UUID individually allocated to each message optional string correlation_id = 2; // optional correlation ID to relate "replies" back to original message IDs From ffb4d12aa0e72462d84f1c2f4ac3dc228d0f92f9 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Thu, 2 Jan 2025 19:49:55 -0500 Subject: [PATCH 17/41] Peer inactivity reaper Signed-off-by: Peter Broadhurst --- config/pkg/pldconf/transportmgr.go | 2 + core/go/internal/components/transportmgr.go | 3 -- .../identityresolver/transport_client.go | 8 ---- .../transport_client.go | 6 --- .../privatetxnmgr/transport_receiver.go | 4 -- core/go/internal/transportmgr/manager.go | 44 ++++++++++++++++--- core/go/internal/transportmgr/manager_test.go | 4 +- core/go/internal/transportmgr/peer.go | 9 ++++ core/go/internal/transportmgr/peer_test.go | 19 ++++---- .../internal/transportmgr/transport_test.go | 27 ++++++------ 10 files changed, 77 insertions(+), 49 deletions(-) diff --git a/config/pkg/pldconf/transportmgr.go b/config/pkg/pldconf/transportmgr.go index 3f9e17cd7..04d2882d5 100644 --- a/config/pkg/pldconf/transportmgr.go +++ b/config/pkg/pldconf/transportmgr.go @@ -20,6 +20,7 @@ type TransportManagerConfig struct { NodeName string `json:"nodeName"` SendQueueLen *int `json:"sendQueueLen"` PeerInactivityTimeout *string `json:"peerInactivityTimeout"` + PeerReaperInterval *string `json:"peerReaperInterval"` SendRetry RetryConfigWithMax `json:"sendRetry"` ReliableScanRetry RetryConfig `json:"reliableScanRetry"` ReliableMessageResend *string `json:"reliableMessageResend"` @@ -35,6 +36,7 @@ var TransportManagerDefaults = &TransportManagerConfig{ SendQueueLen: confutil.P(10), ReliableMessageResend: confutil.P("30s"), PeerInactivityTimeout: confutil.P("1m"), + PeerReaperInterval: confutil.P("30s"), ReliableScanRetry: GenericRetryDefaults.RetryConfig, // SendRetry defaults are deliberately short SendRetry: RetryConfigWithMax{ diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index 5d847ceb7..f2f7a83c8 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -84,9 +84,6 @@ type TransportManagerToTransport interface { // TransportClient is the interface for a component that can receive messages from the transport manager type TransportClient interface { - // Destination returns a string that should be matched with the Destination field of incomming messages to be routed to this client - Destination() prototk.PaladinMsg_Component - // This function is used by the transport manager to deliver messages to the engine. // // The implementation of this function: diff --git a/core/go/internal/identityresolver/transport_client.go b/core/go/internal/identityresolver/transport_client.go index ba5e246ea..ffc1180d2 100644 --- a/core/go/internal/identityresolver/transport_client.go +++ b/core/go/internal/identityresolver/transport_client.go @@ -23,14 +23,6 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/log" ) -// If we had lots of these we would probably want to centralise the assignment of the constants to avoid duplication -// but currently there is only 2 ( the other being PRIVATE_TX_MANAGER_DESTINATION ) -const IDENTITY_RESOLVER_DESTINATION = "identity-resolver-manager" - -func (p *identityResolver) Destination() string { - return IDENTITY_RESOLVER_DESTINATION -} - func (ir *identityResolver) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { //TODO this need to become an ultra low latency, non blocking, handover to the event loop thread. // need some thought on how to handle errors, retries, buffering, swapping idle sequencers in and out of memory etc... diff --git a/core/go/internal/preparedtxdistribution/transport_client.go b/core/go/internal/preparedtxdistribution/transport_client.go index 5354199d0..e23ab6ec3 100644 --- a/core/go/internal/preparedtxdistribution/transport_client.go +++ b/core/go/internal/preparedtxdistribution/transport_client.go @@ -26,12 +26,6 @@ import ( "google.golang.org/protobuf/proto" ) -const PREPARED_TRANSACTION_DISTRIBUTER_DESTINATION = "prepared-transaction-distributer" - -func (sd *preparedTransactionDistributer) Destination() string { - return PREPARED_TRANSACTION_DISTRIBUTER_DESTINATION -} - func (sd *preparedTransactionDistributer) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { log.L(ctx).Debugf("preparedTransactionDistributer:HandlePaladinMsg") messagePayload := message.Payload diff --git a/core/go/internal/privatetxnmgr/transport_receiver.go b/core/go/internal/privatetxnmgr/transport_receiver.go index b42298e5d..4889d0d4a 100644 --- a/core/go/internal/privatetxnmgr/transport_receiver.go +++ b/core/go/internal/privatetxnmgr/transport_receiver.go @@ -22,10 +22,6 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/log" ) -func (p *privateTxManager) Destination() string { - return components.PRIVATE_TX_MANAGER_DESTINATION -} - func (p *privateTxManager) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { //TODO this need to become an ultra low latency, non blocking, handover to the event loop thread. // need some thought on how to handle errors, retries, buffering, swapping idle sequencers in and out of memory etc... diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 4956aa7af..8d8d8d80b 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -40,8 +40,9 @@ import ( ) type transportManager struct { - bgCtx context.Context - mux sync.Mutex + bgCtx context.Context + cancelCtx context.CancelFunc + mux sync.Mutex rpcModule *rpcserver.RPCModule conf *pldconf.TransportManagerConfig @@ -56,8 +57,9 @@ type transportManager struct { transportsByID map[uuid.UUID]*transport transportsByName map[string]*transport - peersLock sync.RWMutex - peers map[string]*peer + peersLock sync.RWMutex + peers map[string]*peer + peerReaperDone chan struct{} reliableMsgWriter flushwriter.Writer[*reliableMsgOp, *noResult] @@ -65,6 +67,7 @@ type transportManager struct { reliableScanRetry *retry.Retry peerInactivityTimeout time.Duration quiesceTimeout time.Duration + peerReaperInterval time.Duration senderBufferLen int reliableMessageResend time.Duration @@ -73,7 +76,6 @@ type transportManager struct { func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerConfig) components.TransportManager { tm := &transportManager{ - bgCtx: bgCtx, conf: conf, localNodeName: conf.NodeName, transportsByID: make(map[uuid.UUID]*transport), @@ -84,9 +86,11 @@ func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerCo sendShortRetry: retry.NewRetryLimited(&conf.SendRetry, &pldconf.TransportManagerDefaults.SendRetry), reliableScanRetry: retry.NewRetryIndefinite(&conf.ReliableScanRetry, &pldconf.TransportManagerDefaults.ReliableScanRetry), peerInactivityTimeout: confutil.DurationMin(conf.PeerInactivityTimeout, 0, *pldconf.TransportManagerDefaults.PeerInactivityTimeout), + peerReaperInterval: confutil.DurationMin(conf.PeerReaperInterval, 100*time.Millisecond, *pldconf.TransportManagerDefaults.PeerReaperInterval), quiesceTimeout: 1 * time.Second, // not currently tunable (considered very small edge case) reliableMessagePageSize: 100, // not currently tunable } + tm.bgCtx, tm.cancelCtx = context.WithCancel(bgCtx) tm.reliableMsgWriter = flushwriter.NewWriter(bgCtx, tm.handleReliableMsgBatch, tm.persistence, &conf.ReliableMessageWriter, &pldconf.TransportManagerDefaults.ReliableMessageWriter) return tm @@ -116,6 +120,8 @@ func (tm *transportManager) PostInit(c components.AllComponents) error { } func (tm *transportManager) Start() error { + tm.peerReaperDone = make(chan struct{}) + go tm.peerReaper() return nil } @@ -136,6 +142,11 @@ func (tm *transportManager) Stop() { tm.cleanupTransport(t) } + tm.cancelCtx() + if tm.peerReaperDone != nil { + <-tm.peerReaperDone + } + } func (tm *transportManager) cleanupTransport(t *transport) { @@ -291,6 +302,29 @@ func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg } +func (tm *transportManager) peerReaper() { + defer close(tm.peerReaperDone) + + for { + select { + case <-tm.bgCtx.Done(): + log.L(tm.bgCtx).Debugf("peer reaper exiting") + return + case <-time.After(tm.peerReaperInterval): + } + + candidates := tm.listActivePeers() + var reaped []*peer + for _, p := range candidates { + if p.isInactive() { + reaped = append(reaped, p) + p.close() + } + } + log.L(tm.bgCtx).Debugf("peer reaper before=%d reaped=%d", len(candidates), len(reaped)) + } +} + func (tm *transportManager) writeAcks(ctx context.Context, dbTX *gorm.DB, acks ...*components.ReliableMessageAck) error { for _, ack := range acks { log.L(ctx).Infof("ack received for message %s", ack.MessageID) diff --git a/core/go/internal/transportmgr/manager_test.go b/core/go/internal/transportmgr/manager_test.go index 59e1995fe..61bf98e20 100644 --- a/core/go/internal/transportmgr/manager_test.go +++ b/core/go/internal/transportmgr/manager_test.go @@ -72,14 +72,14 @@ func newMockComponents(t *testing.T, realDB bool) *mockComponents { return mc } -func newTestTransportManager(t *testing.T, realDB bool, conf *pldconf.TransportManagerConfig, extraSetup ...func(mc *mockComponents)) (context.Context, *transportManager, *mockComponents, func()) { +func newTestTransportManager(t *testing.T, realDB bool, conf *pldconf.TransportManagerConfig, extraSetup ...func(mc *mockComponents, conf *pldconf.TransportManagerConfig)) (context.Context, *transportManager, *mockComponents, func()) { ctx, cancelCtx := context.WithCancel(context.Background()) oldLevel := logrus.GetLevel() logrus.SetLevel(logrus.TraceLevel) mc := newMockComponents(t, realDB) for _, fn := range extraSetup { - fn(mc) + fn(mc, conf) } tm := NewTransportManager(ctx, conf) diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 5d737fcef..93a568abb 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -487,6 +487,15 @@ func (p *peer) sender() { } } +func (p *peer) isInactive() bool { + p.statsLock.Lock() + defer p.statsLock.Unlock() + + now := time.Now() + return (p.Stats.LastSend == nil || now.Sub(p.Stats.LastSend.Time()) > p.tm.peerInactivityTimeout) && + (p.Stats.LastReceive == nil || now.Sub(p.Stats.LastReceive.Time()) > p.tm.peerInactivityTimeout) +} + func (p *peer) close() { p.cancelCtx() if p.senderStarted.Load() { diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index e22bae588..cc53ae62f 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -39,13 +39,13 @@ import ( "gorm.io/gorm" ) -func mockGetStateRetryThenOk(mc *mockComponents) { +func mockGetStateRetryThenOk(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false). Return(nil, fmt.Errorf("pop")).Once() - mockGetStateOk(mc) + mockGetStateOk(mc, conf) } -func mockGetStateOk(mc *mockComponents) { +func mockGetStateOk(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mGS := mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false) mGS.Run(func(args mock.Arguments) { mGS.Return(&pldapi.State{ @@ -144,6 +144,9 @@ func TestReliableMessageResendRealDB(t *testing.T) { func TestReliableMessageSendSendQuiesceRealDB(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, true, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + conf.PeerReaperInterval = confutil.P("50ms") + }, mockGoodTransport, mockGetStateOk, ) @@ -216,7 +219,7 @@ func TestSendBadReliableMessageMarkedFailRealDB(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, true, mockGoodTransport, - func(mc *mockComponents) { + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { // missing state mc.stateManager.On("GetState", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, false, false). Return(nil, nil).Once() @@ -301,7 +304,7 @@ func TestConnectionRace(t *testing.T) { connRelease := make(chan struct{}) ctx, tm, tp, done := newTestTransport(t, false, - func(mc *mockComponents) { + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mGNT := mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node2", @@ -472,7 +475,7 @@ func TestDeactivateFail(t *testing.T) { func TestGetReliableMessageByIDFail(t *testing.T) { - ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnError(fmt.Errorf("pop")) }) defer done() @@ -523,7 +526,7 @@ func TestProcessReliableMsgPageIgnoreBeforeHWM(t *testing.T) { func TestProcessReliableMsgPageIgnoreUnsupported(t *testing.T) { - ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.db.Mock.ExpectExec("INSERT.*reliable_msg_acks").WillReturnError(fmt.Errorf("pop")) }) defer done() @@ -549,7 +552,7 @@ func TestProcessReliableMsgPageInsertFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGetStateOk, - func(mc *mockComponents) { + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnResult(driver.ResultNoRows) }) defer done() diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index 9a2d7f21d..33046a098 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -52,16 +52,17 @@ func newTestPlugin(transportFuncs *plugintk.TransportAPIFunctions) *testPlugin { } } -func newTestTransport(t *testing.T, realDB bool, extraSetup ...func(mc *mockComponents)) (context.Context, *transportManager, *testPlugin, func()) { +func newTestTransport(t *testing.T, realDB bool, extraSetup ...func(mc *mockComponents, conf *pldconf.TransportManagerConfig)) (context.Context, *transportManager, *testPlugin, func()) { - ctx, tm, _, done := newTestTransportManager(t, realDB, &pldconf.TransportManagerConfig{ + conf := &pldconf.TransportManagerConfig{ NodeName: "node1", Transports: map[string]*pldconf.TransportConfig{ "test1": { Config: map[string]any{"some": "conf"}, }, }, - }, extraSetup...) + } + ctx, tm, _, done := newTestTransportManager(t, realDB, conf, extraSetup...) tp := newTestPlugin(nil) tp.Functions = &plugintk.TransportAPIFunctions{ @@ -119,7 +120,7 @@ func testMessage() *components.FireAndForgetMessageSend { } } -func mockEmptyReliableMsgs(mc *mockComponents) { +func mockEmptyReliableMsgs(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnRows(sqlmock.NewRows([]string{})) mc.db.Mock.MatchExpectationsInOrder(false) } @@ -133,7 +134,7 @@ func mockActivateDeactivateOk(tp *testPlugin) { } } -func mockGoodTransport(mc *mockComponents) { +func mockGoodTransport(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node2", @@ -171,7 +172,7 @@ func TestSendMessage(t *testing.T) { func TestSendMessageNotInit(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockEmptyReliableMsgs, - func(mc *mockComponents) { + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -195,7 +196,7 @@ func TestSendMessageNotInit(t *testing.T) { func TestSendMessageFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockEmptyReliableMsgs, - func(mc *mockComponents) { + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -222,7 +223,7 @@ func TestSendMessageFail(t *testing.T) { } func TestSendMessageDestNotFound(t *testing.T) { - ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents) { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return(nil, fmt.Errorf("not found")) }) defer done() @@ -235,7 +236,7 @@ func TestSendMessageDestNotFound(t *testing.T) { } func TestSendMessageDestNotAvailable(t *testing.T) { - ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents) { + ctx, tm, tp, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -264,7 +265,7 @@ func TestSendMessageDestNotAvailable(t *testing.T) { } func TestGetTransportDetailsOk(t *testing.T) { - ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) { + ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.registryManager.On("GetNodeTransports", mock.Anything, "node2").Return([]*components.RegistryNodeTransportEntry{ { Node: "node1", @@ -314,7 +315,7 @@ func TestSendInvalidMessageNoPayload(t *testing.T) { func TestReceiveMessage(t *testing.T) { receivedMessages := make(chan *prototk.PaladinMsg, 1) - ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents) { + ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.privateTxManager.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { receivedMessages <- args[1].(*prototk.PaladinMsg) }) @@ -458,7 +459,7 @@ func TestSendContextClosed(t *testing.T) { func TestSendReliableOk(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport, - func(mc *mockComponents) { + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnResult(driver.ResultNoRows) }, ) @@ -478,7 +479,7 @@ func TestSendReliableOk(t *testing.T) { func TestSendReliableFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport, - func(mc *mockComponents) { + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.db.Mock.ExpectExec("INSERT.*reliable_msgs").WillReturnError(fmt.Errorf("pop")) }, ) From 8bdfed80ebf0d6043bde9a054e308faf743e2e0c Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Thu, 2 Jan 2025 21:09:27 -0500 Subject: [PATCH 18/41] Work through DB test of state receipt and ack return Signed-off-by: Peter Broadhurst --- core/go/internal/transportmgr/manager.go | 7 +- ...{msghandler.go => reliable_msg_handler.go} | 50 +++++++++--- .../transportmgr/reliable_msg_handler_test.go | 78 +++++++++++++++++++ core/go/internal/transportmgr/transport.go | 1 + .../internal/transportmgr/transport_test.go | 30 ++++++- 5 files changed, 152 insertions(+), 14 deletions(-) rename core/go/internal/transportmgr/{msghandler.go => reliable_msg_handler.go} (71%) create mode 100644 core/go/internal/transportmgr/reliable_msg_handler_test.go diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 8d8d8d80b..47345b8a4 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -91,8 +91,6 @@ func NewTransportManager(bgCtx context.Context, conf *pldconf.TransportManagerCo reliableMessagePageSize: 100, // not currently tunable } tm.bgCtx, tm.cancelCtx = context.WithCancel(bgCtx) - tm.reliableMsgWriter = flushwriter.NewWriter(bgCtx, tm.handleReliableMsgBatch, tm.persistence, - &conf.ReliableMessageWriter, &pldconf.TransportManagerDefaults.ReliableMessageWriter) return tm } @@ -116,11 +114,14 @@ func (tm *transportManager) PostInit(c components.AllComponents) error { tm.privateTxManager = c.PrivateTxManager() tm.identityResolver = c.IdentityResolver() tm.persistence = c.Persistence() + tm.reliableMsgWriter = flushwriter.NewWriter(tm.bgCtx, tm.handleReliableMsgBatch, tm.persistence, + &tm.conf.ReliableMessageWriter, &pldconf.TransportManagerDefaults.ReliableMessageWriter) return nil } func (tm *transportManager) Start() error { tm.peerReaperDone = make(chan struct{}) + tm.reliableMsgWriter.Start() go tm.peerReaper() return nil } @@ -147,6 +148,8 @@ func (tm *transportManager) Stop() { <-tm.peerReaperDone } + tm.reliableMsgWriter.Shutdown() + } func (tm *transportManager) cleanupTransport(t *transport) { diff --git a/core/go/internal/transportmgr/msghandler.go b/core/go/internal/transportmgr/reliable_msg_handler.go similarity index 71% rename from core/go/internal/transportmgr/msghandler.go rename to core/go/internal/transportmgr/reliable_msg_handler.go index 004b6a196..610687683 100644 --- a/core/go/internal/transportmgr/msghandler.go +++ b/core/go/internal/transportmgr/reliable_msg_handler.go @@ -52,34 +52,35 @@ type noResult struct{} type ackInfo struct { node string id uuid.UUID // sent in CID on wire - Error error `json:"error"` + Error error `json:"error,omitempty"` } -// p, err := tm.getPeer(ctx, v.node, false) -// if err != nil { -// log.L(ctx).Errorf("Discarding message from invalid peer '%s': %s", v.node, err) -// continue -// } -// p.updateReceivedStats(v.msg) +type stateAndAck struct { + state *components.StateUpsertOutsideContext + ack *ackInfo +} func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *gorm.DB, values []*reliableMsgOp) (func(error), []flushwriter.Result[*noResult], error) { var acksToWrite []*components.ReliableMessageAck var acksToSend []*ackInfo - var statesToAdd []*components.StateUpsertOutsideContext + statesToAdd := make(map[string][]*stateAndAck) + // The batch can contain different kinds of message that all need persistence activity for _, v := range values { switch v.msg.MessageType { case RMHMessageTypeStateDistribution: - _, stateToAdd, err := parseStateDistribution(ctx, v.msgID, v.msg.Payload) + sd, stateToAdd, err := parseStateDistribution(ctx, v.msgID, v.msg.Payload) if err != nil { acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msgID, Error: err}, // reject the message permanently ) } else { - statesToAdd = append(statesToAdd, stateToAdd) - acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msgID}) + statesToAdd[sd.Domain] = append(statesToAdd[sd.Domain], &stateAndAck{ + state: stateToAdd, + ack: &ackInfo{node: v.p.Name, id: v.msgID}, + }) } case RMHMessageTypeAck, RMHMessageTypeNack: ackNackToWrite := tm.parseReceivedAckNack(ctx, v.msg) @@ -94,12 +95,39 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go } } + // Inserting the states is a performance critical activity that we ensure we batch as efficiently as possible, + // while protecting ourselves from inserting states that we haven't done the local validation of. + for domain, states := range statesToAdd { + batchStates := make([]*components.StateUpsertOutsideContext, len(states)) + for i, s := range states { + batchStates[i] = s.state + } + _, batchErr := tm.stateManager.WriteReceivedStates(ctx, dbTX, domain, batchStates) + if batchErr != nil { + // We have to try each individually (note if the error was transient in the DB we will rollback + // the whole transaction and won't send any acks at all - which is good as sender will retry in that case) + log.L(ctx).Errorf("batch insert of %d states for domain %s failed - attempting each individually: %s", len(states), domain, batchErr) + for _, s := range states { + _, err := tm.stateManager.WriteReceivedStates(ctx, dbTX, domain, []*components.StateUpsertOutsideContext{s.state}) + if err != nil { + log.L(ctx).Errorf("insert state %s from message %s for domain %s failed - attempting each individually: %s", s.state.ID, s.ack.id, domain, batchErr) + s.ack.Error = err + } + } + } + for _, s := range states { + acksToSend = append(acksToSend, s.ack) + } + } + + // Inserting the acks we receive over the wire is a simple activity if len(acksToWrite) > 0 { if err := tm.writeAcks(ctx, dbTX, acksToWrite...); err != nil { return nil, nil, err } } + // We use a post-commit handler to send back any acks to the other side that are required return func(err error) { if err == nil { // We've committed the database work ok - send the acks/nacks to the other side diff --git a/core/go/internal/transportmgr/reliable_msg_handler_test.go b/core/go/internal/transportmgr/reliable_msg_handler_test.go new file mode 100644 index 000000000..61cd29c19 --- /dev/null +++ b/core/go/internal/transportmgr/reliable_msg_handler_test.go @@ -0,0 +1,78 @@ +/* + * Copyright © 2024 Kaleido, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package transportmgr + +import ( + "context" + "testing" + + "github.com/google/uuid" + "github.com/kaleido-io/paladin/config/pkg/confutil" + "github.com/kaleido-io/paladin/config/pkg/pldconf" + "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/toolkit/pkg/prototk" + "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestReceiveMessageStateWithAckRealDB(t *testing.T) { + ctx, _, tp, done := newTestTransport(t, true, + mockGoodTransport, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.stateManager.On("WriteReceivedStates", mock.Anything, mock.Anything, "domain1", mock.Anything). + Return(nil, nil).Once() + }, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeStateDistribution, + Payload: tktypes.JSONString(&components.StateDistributionWithData{ + StateDistribution: components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), + StateID: tktypes.RandHex(32), + }, + StateData: []byte(`{"some":"data"}`), + }), + } + + mockActivateDeactivateOk(tp) + sentMessages := make(chan *prototk.PaladinMsg) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + sentMessages <- sent + return nil, nil + } + + rmr, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ + FromNode: "node2", + Message: msg, + }) + require.NoError(t, err) + assert.NotNil(t, rmr) + + ack := <-sentMessages + require.JSONEq(t, string(ack.Payload), `{}`) + require.Equal(t, msgID.String(), *ack.CorrelationId) +} diff --git a/core/go/internal/transportmgr/transport.go b/core/go/internal/transportmgr/transport.go index 41327796f..2e031a389 100644 --- a/core/go/internal/transportmgr/transport.go +++ b/core/go/internal/transportmgr/transport.go @@ -155,6 +155,7 @@ func (t *transport) ReceiveMessage(ctx context.Context, req *prototk.ReceiveMess if err != nil { return nil, err } + p.updateReceivedStats(msg) log.L(ctx).Debugf("transport %s message received from %s id=%s (cid=%s)", t.name, p.Name, msgID, tktypes.StrOrEmpty(msg.CorrelationId)) if log.IsTraceEnabled() { diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index 33046a098..fe4b42465 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -312,7 +312,7 @@ func TestSendInvalidMessageNoPayload(t *testing.T) { assert.Regexp(t, "PD012000", err) } -func TestReceiveMessage(t *testing.T) { +func TestReceiveMessageTransactionEngine(t *testing.T) { receivedMessages := make(chan *prototk.PaladinMsg, 1) ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { @@ -340,6 +340,34 @@ func TestReceiveMessage(t *testing.T) { <-receivedMessages } +func TestReceiveMessageIdentityResolver(t *testing.T) { + receivedMessages := make(chan *prototk.PaladinMsg, 1) + + ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.identityResolver.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { + receivedMessages <- args[1].(*prototk.PaladinMsg) + }) + }) + defer done() + + msg := &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_IDENTITY_RESOLVER, + MessageType: "myMessageType", + Payload: []byte("some data"), + } + + rmr, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ + FromNode: "node2", + Message: msg, + }) + require.NoError(t, err) + assert.NotNil(t, rmr) + + <-receivedMessages +} + func TestReceiveMessageInvalidComponent(t *testing.T) { ctx, _, tp, done := newTestTransport(t, false) defer done() From d168cbecd54e21f8ee90afe900762635df11db67 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Thu, 2 Jan 2025 22:03:54 -0500 Subject: [PATCH 19/41] Remove quiesce logic now we have simpler reaper based close Signed-off-by: Peter Broadhurst --- core/go/internal/transportmgr/manager.go | 25 +--- core/go/internal/transportmgr/peer.go | 121 +++++++++--------- core/go/internal/transportmgr/peer_test.go | 89 +------------ .../transportmgr/reliable_msg_handler.go | 55 ++++++-- .../transportmgr/reliable_msg_handler_test.go | 4 +- .../internal/transportmgr/transport_test.go | 3 + 6 files changed, 110 insertions(+), 187 deletions(-) diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 47345b8a4..553e40435 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -130,7 +130,7 @@ func (tm *transportManager) Stop() { peers := tm.listActivePeers() for _, p := range peers { - p.close() + tm.reapPeer(p) } tm.mux.Lock() @@ -305,29 +305,6 @@ func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg } -func (tm *transportManager) peerReaper() { - defer close(tm.peerReaperDone) - - for { - select { - case <-tm.bgCtx.Done(): - log.L(tm.bgCtx).Debugf("peer reaper exiting") - return - case <-time.After(tm.peerReaperInterval): - } - - candidates := tm.listActivePeers() - var reaped []*peer - for _, p := range candidates { - if p.isInactive() { - reaped = append(reaped, p) - p.close() - } - } - log.L(tm.bgCtx).Debugf("peer reaper before=%d reaped=%d", len(candidates), len(reaped)) - } -} - func (tm *transportManager) writeAcks(ctx context.Context, dbTX *gorm.DB, acks ...*components.ReliableMessageAck) error { for _, ack := range acks { log.L(ctx).Infof("ack received for message %s", ack.MessageID) diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 93a568abb..8ea6b4dcf 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -52,7 +52,6 @@ type peer struct { lastDrainHWM *uint64 persistentMsgsDrained bool - quiescing bool senderStarted atomic.Bool senderDone chan struct{} } @@ -63,6 +62,26 @@ func (p nameSortedPeers) Len() int { return len(p) } func (p nameSortedPeers) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p nameSortedPeers) Less(i, j int) bool { return cmp.Less(p[i].Name, p[j].Name) } +func (tm *transportManager) getPeer(ctx context.Context, nodeName string, sending bool) (*peer, error) { + + if err := tktypes.ValidateSafeCharsStartEndAlphaNum(ctx, nodeName, tktypes.DefaultNameMaxLen, "node"); err != nil { + return nil, i18n.WrapError(ctx, err, msgs.MsgTransportInvalidTargetNode, nodeName) + } + if nodeName == tm.localNodeName { + return nil, i18n.NewError(ctx, msgs.MsgTransportSendLocalNode, tm.localNodeName) + } + + // Hopefully this is an already active connection + p := tm.getActivePeer(nodeName) + if p != nil && (p.senderStarted.Load() || !sending) { + // Already active and obtained via read-lock + log.L(ctx).Debugf("connection already active for peer '%s'", nodeName) + return p, nil + } + + return tm.connectPeer(ctx, nodeName, sending) +} + // get a list of all active peers func (tm *transportManager) listActivePeers() nameSortedPeers { tm.peersLock.RLock() @@ -82,24 +101,47 @@ func (tm *transportManager) getActivePeer(nodeName string) *peer { return tm.peers[nodeName] } -func (tm *transportManager) getPeer(ctx context.Context, nodeName string, sending bool) (*peer, error) { +func (tm *transportManager) reapPeer(p *peer) { + p.tm.peersLock.Lock() + defer p.tm.peersLock.Unlock() + delete(p.tm.peers, p.Name) - if err := tktypes.ValidateSafeCharsStartEndAlphaNum(ctx, nodeName, tktypes.DefaultNameMaxLen, "node"); err != nil { - return nil, i18n.WrapError(ctx, err, msgs.MsgTransportInvalidTargetNode, nodeName) - } - if nodeName == tm.localNodeName { - return nil, i18n.NewError(ctx, msgs.MsgTransportSendLocalNode, tm.localNodeName) - } + // Close down the peer + log.L(p.ctx).Infof("peer %s deactivating", p.Name) + p.close() - // Hopefully this is an already active connection - p := tm.getActivePeer(nodeName) - if p != nil && (p.senderStarted.Load() || !sending) { - // Already active and obtained via read-lock - log.L(ctx).Debugf("connection already active for peer '%s'", nodeName) - return p, nil + if p.senderStarted.Load() { + // Holding the lock while activating/deactivating ensures we never dual-activate in the transport + if _, err := p.transport.api.DeactivateNode(p.ctx, &prototk.DeactivateNodeRequest{ + NodeName: p.Name, + }); err != nil { + log.L(p.ctx).Warnf("peer %s returned deactivation error: %s", p.Name, err) + } } - return tm.connectPeer(ctx, nodeName, sending) +} + +func (tm *transportManager) peerReaper() { + defer close(tm.peerReaperDone) + + for { + select { + case <-tm.bgCtx.Done(): + log.L(tm.bgCtx).Debugf("peer reaper exiting") + return + case <-time.After(tm.peerReaperInterval): + } + + candidates := tm.listActivePeers() + var reaped []*peer + for _, p := range candidates { + if p.isInactive() { + tm.reapPeer(p) + reaped = append(reaped, p) + } + } + log.L(tm.bgCtx).Debugf("peer reaper before=%d reaped=%d", len(candidates), len(reaped)) + } } func (tm *transportManager) connectPeer(ctx context.Context, nodeName string, sending bool) (*peer, error) { @@ -226,43 +268,6 @@ func (p *peer) updateReceivedStats(msg *prototk.PaladinMsg) { p.Stats.ReceivedBytes += uint64(len(msg.Payload)) } -func (p *peer) senderCleanup() { - p.deactivate() - - // There's a very small window where we might have got delivered a message by a routine - // that got us out of the map before we deactivated. - // In this edge case, we need to spin off the new peer connection to replace us. - for p.quiescing { - select { - case msg := <-p.sendQueue: - log.L(p.ctx).Infof("message delivered in inactivity quiesce window. Re-connecting") - _ = p.tm.queueFireAndForget(p.ctx, p.Name, msg) - case <-p.persistedMsgsAvailable: - log.L(p.ctx).Infof("reliable message delivered in inactivity quiesce window. Re-connecting") - _, _ = p.tm.getPeer(p.ctx, p.Name, true) - case <-time.After(p.tm.quiesceTimeout): - p.quiescing = false - } - } - - close(p.senderDone) -} - -func (p *peer) deactivate() { - // Hold the peers write lock to do this - p.tm.peersLock.Lock() - defer p.tm.peersLock.Unlock() - delete(p.tm.peers, p.Name) - - // Holding the lock while activating/deactivating ensures we never dual-activate in the transport - log.L(p.ctx).Infof("peer %s deactivating", p.Name) - if _, err := p.transport.api.DeactivateNode(p.ctx, &prototk.DeactivateNodeRequest{ - NodeName: p.Name, - }); err != nil { - log.L(p.ctx).Warnf("peer %s returned deactivation error: %s", p.Name, err) - } -} - func (p *peer) reliableMessageScan(checkNew bool) error { fullScan := p.lastDrainHWM == nil || time.Since(p.lastFullScan) >= p.tm.reliableMessageResend @@ -434,12 +439,11 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e } func (p *peer) sender() { - defer p.senderCleanup() + defer close(p.senderDone) log.L(p.ctx).Infof("peer %s active", p.Name) checkNew := false - hitInactivityTimeout := false for { // We send/resend any reliable messages queued up first @@ -449,14 +453,6 @@ func (p *peer) sender() { if err != nil { return // context closed } - - // Depending on our persistent message status, check if we're able to quiesce - if hitInactivityTimeout && p.persistentMsgsDrained && - (p.Stats.LastReceive == nil || time.Since(p.Stats.LastReceive.Time()) > p.tm.peerInactivityTimeout) { - p.quiescing = true - return // quiesce handling is in senderDone() deferred function - } - hitInactivityTimeout = false checkNew = false // Our wait timeout needs to be the shortest of: @@ -467,7 +463,6 @@ func (p *peer) sender() { for processingMsgs { select { case <-resendTimer.C: - hitInactivityTimeout = true processingMsgs = false // spin round and check if we have persisted messages to (re)process case <-p.persistedMsgsAvailable: resendTimer.Stop() diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index cc53ae62f..a37bcb799 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -204,10 +204,12 @@ func TestReliableMessageSendSendQuiesceRealDB(t *testing.T) { // Deliver the two acks p := tm.peers["node2"] for _, msgID := range msgIDs { - err := tm.writeAcks(ctx, tm.persistence.DB(), &components.ReliableMessageAck{ - MessageID: msgID, + rmr, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ + FromNode: "node2", + Message: buildAck(msgID, nil), }) require.NoError(t, err) + assert.NotNil(t, rmr) } // Wait for the peer to end via quiesce @@ -366,89 +368,6 @@ func TestActivateBadPeerInfo(t *testing.T) { } -func TestQuiesceDetectPersistentMessage(t *testing.T) { - - ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport) - defer done() - - // Load up a notification for a persistent message - tm.reliableMessageResend = 10 * time.Millisecond - tm.peerInactivityTimeout = 1 * time.Second - tm.quiesceTimeout = 1 * time.Millisecond - - mockActivateDeactivateOk(tp) - - quiescingPeer, err := tm.getPeer(ctx, "node2", true) - require.NoError(t, err) - - // Force cancel that peer - quiescingPeer.cancelCtx() - <-quiescingPeer.senderDone - - // Simulate quiescing with persistent messages delivered - quiescingPeer.quiescing = true - quiescingPeer.senderDone = make(chan struct{}) - quiescingPeer.persistedMsgsAvailable = make(chan struct{}, 1) - quiescingPeer.persistedMsgsAvailable <- struct{}{} - - // Now in quiesce it will start up a new one - quiescingPeer.senderCleanup() - - require.NotNil(t, tm.peers["node2"]) - -} - -func TestQuiesceDetectFireAndForgetMessage(t *testing.T) { - - ctx, tm, tp, done := newTestTransport(t, false, - mockGoodTransport, - mockEmptyReliableMsgs, - mockEmptyReliableMsgs, - ) - defer done() - - // Load up a notification for a persistent message - tm.reliableMessageResend = 1 * time.Second - tm.peerInactivityTimeout = 1 * time.Second - tm.quiesceTimeout = 1 * time.Millisecond - - mockActivateDeactivateOk(tp) - - quiescingPeer, err := tm.getPeer(ctx, "node2", true) - require.NoError(t, err) - - // Force cancel that peer - quiescingPeer.cancelCtx() - <-quiescingPeer.senderDone - - // Simulate quiescing with persistent messages delivered - quiescingPeer.quiescing = true - quiescingPeer.ctx = ctx - quiescingPeer.senderDone = make(chan struct{}) - quiescingPeer.sendQueue = make(chan *prototk.PaladinMsg, 1) - quiescingPeer.sendQueue <- &prototk.PaladinMsg{ - MessageId: uuid.NewString(), - Component: prototk.PaladinMsg_TRANSACTION_ENGINE, - MessageType: "example", - Payload: []byte(`{}`), - } - - sentMessages := make(chan *prototk.PaladinMsg, 1) - tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { - sent := req.Message - assert.NotEmpty(t, sent.MessageId) - sentMessages <- sent - return nil, nil - } - // Now in quiesce it will start up a new one - quiescingPeer.senderCleanup() - - require.NotNil(t, tm.peers["node2"]) - - <-sentMessages - -} - func TestDeactivateFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, diff --git a/core/go/internal/transportmgr/reliable_msg_handler.go b/core/go/internal/transportmgr/reliable_msg_handler.go index 610687683..34bbb8b2a 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler.go +++ b/core/go/internal/transportmgr/reliable_msg_handler.go @@ -120,11 +120,33 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go } } - // Inserting the acks we receive over the wire is a simple activity + // We can only store acks for messages that are in our DB (due to foreign key relationship), + // so we have to query them first to validate the acks before attempting insert. if len(acksToWrite) > 0 { - if err := tm.writeAcks(ctx, dbTX, acksToWrite...); err != nil { + ackQuery := make([]uuid.UUID, len(acksToWrite)) + for i, a := range acksToWrite { + ackQuery[i] = a.MessageID + } + var matchedMsgs []*components.ReliableMessage + err := dbTX.WithContext(ctx).Select("id").Find(&matchedMsgs).Error + if err != nil { return nil, nil, err } + validatedAcks := make([]*components.ReliableMessageAck, 0, len(acksToWrite)) + for _, a := range acksToWrite { + for _, mm := range matchedMsgs { + if mm.ID == a.MessageID { + log.L(ctx).Infof("Writing ack for message %s", a.MessageID) + validatedAcks = append(validatedAcks, a) + } + } + } + if len(validatedAcks) > 0 { + // Now we're actually ready to insert them + if err := tm.writeAcks(ctx, dbTX, acksToWrite...); err != nil { + return nil, nil, err + } + } } // We use a post-commit handler to send back any acks to the other side that are required @@ -132,24 +154,29 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go if err == nil { // We've committed the database work ok - send the acks/nacks to the other side for _, a := range acksToSend { - cid := a.id.String() - msgType := RMHMessageTypeAck - if a.Error != nil { - msgType = RMHMessageTypeNack - } - _ = tm.queueFireAndForget(ctx, a.node, &prototk.PaladinMsg{ - MessageId: uuid.NewString(), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: msgType, - CorrelationId: &cid, - Payload: tktypes.JSONString(a), - }) + + _ = tm.queueFireAndForget(ctx, a.node, buildAck(a.id, a.Error)) } } }, make([]flushwriter.Result[*noResult], len(values)), nil } +func buildAck(msgID uuid.UUID, err error) *prototk.PaladinMsg { + cid := msgID.String() + msgType := RMHMessageTypeAck + if err != nil { + msgType = RMHMessageTypeNack + } + return &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: msgType, + CorrelationId: &cid, + Payload: tktypes.JSONString(&ackInfo{Error: err}), + } +} + func (tm *transportManager) parseReceivedAckNack(ctx context.Context, msg *prototk.PaladinMsg) *components.ReliableMessageAck { var info ackInfo var cid uuid.UUID diff --git a/core/go/internal/transportmgr/reliable_msg_handler_test.go b/core/go/internal/transportmgr/reliable_msg_handler_test.go index 61cd29c19..1772e6f02 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler_test.go +++ b/core/go/internal/transportmgr/reliable_msg_handler_test.go @@ -30,7 +30,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestReceiveMessageStateWithAckRealDB(t *testing.T) { +func TestReceiveMessageStateSendAckRealDB(t *testing.T) { ctx, _, tp, done := newTestTransport(t, true, mockGoodTransport, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { @@ -65,6 +65,7 @@ func TestReceiveMessageStateWithAckRealDB(t *testing.T) { return nil, nil } + // Receive the message that needs the ack rmr, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ FromNode: "node2", Message: msg, @@ -75,4 +76,5 @@ func TestReceiveMessageStateWithAckRealDB(t *testing.T) { ack := <-sentMessages require.JSONEq(t, string(ack.Payload), `{}`) require.Equal(t, msgID.String(), *ack.CorrelationId) + } diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index fe4b42465..a6b10e355 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -61,6 +61,9 @@ func newTestTransport(t *testing.T, realDB bool, extraSetup ...func(mc *mockComp Config: map[string]any{"some": "conf"}, }, }, + ReliableMessageWriter: pldconf.FlushWriterConfig{ + BatchMaxSize: confutil.P(1), + }, } ctx, tm, _, done := newTestTransportManager(t, realDB, conf, extraSetup...) From a2123a3ba1da072513b07266b0b937be2c53c667 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Fri, 3 Jan 2025 10:22:47 -0500 Subject: [PATCH 20/41] Close out on receive/ack logic Signed-off-by: Peter Broadhurst --- .../transportmgr/reliable_msg_handler.go | 22 +- .../transportmgr/reliable_msg_handler_test.go | 239 ++++++++++++++++++ .../internal/transportmgr/transport_test.go | 19 ++ 3 files changed, 270 insertions(+), 10 deletions(-) diff --git a/core/go/internal/transportmgr/reliable_msg_handler.go b/core/go/internal/transportmgr/reliable_msg_handler.go index 34bbb8b2a..51a57aaee 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler.go +++ b/core/go/internal/transportmgr/reliable_msg_handler.go @@ -18,6 +18,7 @@ package transportmgr import ( "context" "encoding/json" + "errors" "github.com/google/uuid" "github.com/hyperledger/firefly-common/pkg/i18n" @@ -52,7 +53,7 @@ type noResult struct{} type ackInfo struct { node string id uuid.UUID // sent in CID on wire - Error error `json:"error,omitempty"` + Error string `json:"error,omitempty"` } type stateAndAck struct { @@ -74,7 +75,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go sd, stateToAdd, err := parseStateDistribution(ctx, v.msgID, v.msg.Payload) if err != nil { acksToSend = append(acksToSend, - &ackInfo{node: v.p.Name, id: v.msgID, Error: err}, // reject the message permanently + &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently ) } else { statesToAdd[sd.Domain] = append(statesToAdd[sd.Domain], &stateAndAck{ @@ -90,7 +91,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go default: err := i18n.NewError(ctx, msgs.MsgTransportUnsupportedReliableMsgType, v.msg.MessageType) acksToSend = append(acksToSend, - &ackInfo{node: v.p.Name, id: v.msgID, Error: err}, // reject the message permanently + &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently ) } } @@ -111,7 +112,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go _, err := tm.stateManager.WriteReceivedStates(ctx, dbTX, domain, []*components.StateUpsertOutsideContext{s.state}) if err != nil { log.L(ctx).Errorf("insert state %s from message %s for domain %s failed - attempting each individually: %s", s.state.ID, s.ack.id, domain, batchErr) - s.ack.Error = err + s.ack.Error = err.Error() } } } @@ -154,8 +155,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go if err == nil { // We've committed the database work ok - send the acks/nacks to the other side for _, a := range acksToSend { - - _ = tm.queueFireAndForget(ctx, a.node, buildAck(a.id, a.Error)) + _ = tm.queueFireAndForget(ctx, a.node, buildAck(a.id, errors.New(a.Error))) } } }, make([]flushwriter.Result[*noResult], len(values)), nil @@ -165,15 +165,17 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go func buildAck(msgID uuid.UUID, err error) *prototk.PaladinMsg { cid := msgID.String() msgType := RMHMessageTypeAck + var errString string if err != nil { msgType = RMHMessageTypeNack + errString = err.Error() } return &prototk.PaladinMsg{ MessageId: uuid.NewString(), Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, MessageType: msgType, CorrelationId: &cid, - Payload: tktypes.JSONString(&ackInfo{Error: err}), + Payload: tktypes.JSONString(&ackInfo{Error: errString}), } } @@ -196,10 +198,10 @@ func (tm *transportManager) parseReceivedAckNack(ctx context.Context, msg *proto Time: tktypes.TimestampNow(), } if msg.MessageType == RMHMessageTypeNack { - if info.Error == nil { - info.Error = i18n.NewError(ctx, msgs.MsgTransportNackMissingError) + if info.Error == "" { + info.Error = i18n.NewError(ctx, msgs.MsgTransportNackMissingError).Error() } - ackNackToWrite.Error = info.Error.Error() + ackNackToWrite.Error = info.Error } return ackNackToWrite } diff --git a/core/go/internal/transportmgr/reliable_msg_handler_test.go b/core/go/internal/transportmgr/reliable_msg_handler_test.go index 1772e6f02..d3e272323 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler_test.go +++ b/core/go/internal/transportmgr/reliable_msg_handler_test.go @@ -17,8 +17,11 @@ package transportmgr import ( "context" + "encoding/json" + "fmt" "testing" + "github.com/DATA-DOG/go-sqlmock" "github.com/google/uuid" "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" @@ -78,3 +81,239 @@ func TestReceiveMessageStateSendAckRealDB(t *testing.T) { require.Equal(t, msgID.String(), *ack.CorrelationId) } + +func TestHandleStateDistroBadState(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.stateManager.On("WriteReceivedStates", mock.Anything, mock.Anything, "domain1", mock.Anything). + Return(nil, fmt.Errorf("bad data")).Twice() + }, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeStateDistribution, + Payload: tktypes.JSONString(&components.StateDistributionWithData{ + StateDistribution: components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), + StateID: tktypes.RandHex(32), + }, + StateData: []byte(`{"some":"data"}`), + }), + } + + mockActivateDeactivateOk(tp) + sentMessages := make(chan *prototk.PaladinMsg) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + sentMessages <- sent + return nil, nil + } + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.NoError(t, err) + + // Run the postCommit and check we get the nack + postCommit(nil) + + expectedNack := <-sentMessages + require.Equal(t, msgID.String(), *expectedNack.CorrelationId) + require.Equal(t, prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, expectedNack.Component) + require.Equal(t, RMHMessageTypeNack, expectedNack.MessageType) + var ai ackInfo + err = json.Unmarshal(expectedNack.Payload, &ai) + require.NoError(t, err) + require.Regexp(t, "bad data", ai.Error) +} + +func TestHandleStateDistroBadMsg(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeStateDistribution, + Payload: tktypes.JSONString(&components.StateDistributionWithData{ + StateDistribution: components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + SchemaID: "wrongness", + StateID: tktypes.RandHex(32), + }, + StateData: []byte(`{"some":"data"}`), + }), + } + + mockActivateDeactivateOk(tp) + sentMessages := make(chan *prototk.PaladinMsg) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + sentMessages <- sent + return nil, nil + } + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.NoError(t, err) + + // Run the postCommit and check we get the nack + postCommit(nil) + + expectedNack := <-sentMessages + require.Equal(t, msgID.String(), *expectedNack.CorrelationId) + require.Equal(t, prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, expectedNack.Component) + require.Equal(t, RMHMessageTypeNack, expectedNack.MessageType) + var ai ackInfo + err = json.Unmarshal(expectedNack.Payload, &ai) + require.NoError(t, err) + require.Regexp(t, "PD012016", ai.Error) +} + +func TestHandleStateDistroUnknownMsgType(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: "unknown", + Payload: []byte(`{}`), + } + + mockActivateDeactivateOk(tp) + sentMessages := make(chan *prototk.PaladinMsg) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + sentMessages <- sent + return nil, nil + } + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.NoError(t, err) + + // Run the postCommit and check we get the nack + postCommit(nil) + + expectedNack := <-sentMessages + require.Equal(t, msgID.String(), *expectedNack.CorrelationId) + require.Equal(t, prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, expectedNack.Component) + require.Equal(t, RMHMessageTypeNack, expectedNack.MessageType) + var ai ackInfo + err = json.Unmarshal(expectedNack.Payload, &ai) + require.NoError(t, err) + require.Regexp(t, "PD012017", ai.Error) +} + +func TestHandleAckFailReadMsg(t *testing.T) { + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnError(fmt.Errorf("pop")) + }) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeAck, + Payload: []byte(`{}`), + } + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.Regexp(t, "pop", err) + +} + +func TestHandleNackFailWriteAck(t *testing.T) { + msgID := uuid.New() + + ctx, tm, _, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.db.Mock.ExpectQuery("SELECT.*reliable_msgs").WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow(msgID.String())) + mc.db.Mock.ExpectExec("INSERT.*reliable_msg_acks").WillReturnError(fmt.Errorf("pop")) + }) + defer done() + + msg := &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + CorrelationId: confutil.P(msgID.String()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeNack, + Payload: []byte(`{}`), + } + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.Regexp(t, "pop", err) + +} + +func TestHandleBadAckNoCorrelId(t *testing.T) { + msgID := uuid.New() + + ctx, tm, _, done := newTestTransport(t, false) + defer done() + + msg := &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeAck, + Payload: []byte(`{}`), + } + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.NoError(t, err) + postCommit(nil) +} diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index a6b10e355..d3dc86938 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -390,6 +390,25 @@ func TestReceiveMessageInvalidComponent(t *testing.T) { require.Regexp(t, "PD012011", err) } +func TestReceiveMessageInvalidNode(t *testing.T) { + ctx, _, tp, done := newTestTransport(t, false) + defer done() + + msg := &prototk.PaladinMsg{ + MessageId: uuid.NewString(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_Component(42), + MessageType: "myMessageType", + Payload: []byte("some data"), + } + + _, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ + FromNode: ".wrong", + Message: msg, + }) + require.Regexp(t, "PD012015", err) +} + func TestReceiveMessageNotInit(t *testing.T) { ctx, _, tp, done := newTestTransport(t, false) defer done() From 6fdfc2d368871fd0f91eb11738d96862088449d8 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Fri, 3 Jan 2025 10:31:16 -0500 Subject: [PATCH 21/41] Add RPC interface for query Signed-off-by: Peter Broadhurst --- core/go/internal/transportmgr/peer.go | 17 +++++++++++ .../internal/transportmgr/transportmgr_rpc.go | 17 ++++++++++- .../transportmgr/transportmgr_rpc_test.go | 30 +++++++++++++++---- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 8ea6b4dcf..e14187fc1 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -94,6 +94,23 @@ func (tm *transportManager) listActivePeers() nameSortedPeers { return peers } +func (tm *transportManager) listActivePeerInfo() []*pldapi.PeerInfo { + peers := tm.listActivePeers() + peerInfo := make([]*pldapi.PeerInfo, len(peers)) + for i, p := range peers { + peerInfo[i] = &p.PeerInfo + } + return peerInfo +} + +func (tm *transportManager) getPeerInfo(nodeName string) *pldapi.PeerInfo { + peer := tm.getActivePeer(nodeName) + if peer == nil { + return nil + } + return &peer.PeerInfo +} + // efficient read-locked call to get an active peer connection func (tm *transportManager) getActivePeer(nodeName string) *peer { tm.peersLock.RLock() diff --git a/core/go/internal/transportmgr/transportmgr_rpc.go b/core/go/internal/transportmgr/transportmgr_rpc.go index 57894c203..6df65e3ad 100644 --- a/core/go/internal/transportmgr/transportmgr_rpc.go +++ b/core/go/internal/transportmgr/transportmgr_rpc.go @@ -19,6 +19,7 @@ package transportmgr import ( "context" + "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" "github.com/kaleido-io/paladin/toolkit/pkg/rpcserver" ) @@ -30,7 +31,9 @@ func (tm *transportManager) initRPC() { tm.rpcModule = rpcserver.NewRPCModule("transport"). Add("transport_nodeName", tm.rpcNodeName()). Add("transport_localTransports", tm.rpcLocalTransports()). - Add("transport_localTransportDetails", tm.rpcLocalTransportDetails()) + Add("transport_localTransportDetails", tm.rpcLocalTransportDetails()). + Add("transport_peers", tm.rpcPeers()). + Add("transport_peerInfo", tm.rpcPeerInfo()) } func (tm *transportManager) rpcNodeName() rpcserver.RPCHandler { @@ -54,3 +57,15 @@ func (tm *transportManager) rpcLocalTransportDetails() rpcserver.RPCHandler { return tm.getLocalTransportDetails(ctx, transportName) }) } + +func (tm *transportManager) rpcPeers() rpcserver.RPCHandler { + return rpcserver.RPCMethod0(func(ctx context.Context) ([]*pldapi.PeerInfo, error) { + return tm.listActivePeerInfo(), nil + }) +} + +func (tm *transportManager) rpcPeerInfo() rpcserver.RPCHandler { + return rpcserver.RPCMethod1(func(ctx context.Context, nodeName string) (*pldapi.PeerInfo, error) { + return tm.getPeerInfo(nodeName), nil + }) +} diff --git a/core/go/internal/transportmgr/transportmgr_rpc_test.go b/core/go/internal/transportmgr/transportmgr_rpc_test.go index 1feb7a158..16f695996 100644 --- a/core/go/internal/transportmgr/transportmgr_rpc_test.go +++ b/core/go/internal/transportmgr/transportmgr_rpc_test.go @@ -23,6 +23,7 @@ import ( "github.com/go-resty/resty/v2" "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" + "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/rpcclient" "github.com/kaleido-io/paladin/toolkit/pkg/rpcserver" @@ -38,13 +39,13 @@ func TestRPCLocalDetails(t *testing.T) { defer rpcDone() var nodeName string - err := rpc.CallRPC(ctx, &nodeName, "transport_nodeName") - require.NoError(t, err) + rpcErr := rpc.CallRPC(ctx, &nodeName, "transport_nodeName") + require.NoError(t, rpcErr) assert.Equal(t, "node1", nodeName) var localTransports []string - err = rpc.CallRPC(ctx, &localTransports, "transport_localTransports") - require.NoError(t, err) + rpcErr = rpc.CallRPC(ctx, &localTransports, "transport_localTransports") + require.NoError(t, rpcErr) assert.Equal(t, []string{tp.t.name}, localTransports) tp.Functions.GetLocalDetails = func(ctx context.Context, gldr *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) { @@ -54,10 +55,27 @@ func TestRPCLocalDetails(t *testing.T) { } var localTransportDetails string - err = rpc.CallRPC(ctx, &localTransportDetails, "transport_localTransportDetails", localTransports[0]) - require.NoError(t, err) + rpcErr = rpc.CallRPC(ctx, &localTransportDetails, "transport_localTransportDetails", localTransports[0]) + require.NoError(t, rpcErr) assert.Equal(t, "some details", localTransportDetails) + _, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + var peers []*pldapi.PeerInfo + rpcErr = rpc.CallRPC(ctx, &peers, "transport_peers") + require.NoError(t, rpcErr) + require.Len(t, peers, 1) + require.Equal(t, "node2", peers[0].Name) + + var peer *pldapi.PeerInfo + rpcErr = rpc.CallRPC(ctx, &peer, "transport_peerInfo", "node2") + require.NoError(t, rpcErr) + require.Equal(t, "node2", peer.Name) + rpcErr = rpc.CallRPC(ctx, &peer, "transport_peerInfo", "node3") + require.NoError(t, rpcErr) + require.Nil(t, peer) + } func newTestRPCServer(t *testing.T, ctx context.Context, tm *transportManager) (rpcclient.Client, func()) { From 2b62d3999047e32cda68be624d2d66bd496f9557 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Fri, 3 Jan 2025 10:49:22 -0500 Subject: [PATCH 22/41] Add peer RPC functions to client/docs Signed-off-by: Peter Broadhurst --- toolkit/go/pkg/pldclient/transport.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/toolkit/go/pkg/pldclient/transport.go b/toolkit/go/pkg/pldclient/transport.go index 2d6adad0d..048c92437 100644 --- a/toolkit/go/pkg/pldclient/transport.go +++ b/toolkit/go/pkg/pldclient/transport.go @@ -17,6 +17,8 @@ package pldclient import ( "context" + + "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" ) type Transport interface { @@ -24,6 +26,8 @@ type Transport interface { NodeName(ctx context.Context) (nodeName string, err error) LocalTransports(ctx context.Context) (transportNames []string, err error) + Peers(ctx context.Context) (peers []*pldapi.PeerInfo, err error) + PeerInfo(ctx context.Context, nodeName string) (peer *pldapi.PeerInfo, err error) } // This is necessary because there's no way to introspect function parameter names via reflection @@ -68,3 +72,13 @@ func (t *transport) LocalTransportDetails(ctx context.Context, transportName str err = t.c.CallRPC(ctx, &transportDetailsStr, "transport_localTransportDetails", transportName) return } + +func (t *transport) Peers(ctx context.Context) (peers []*pldapi.PeerInfo, err error) { + err = t.c.CallRPC(ctx, &peers, "transport_peers") + return +} + +func (t *transport) PeerInfo(ctx context.Context, nodeName string) (peer *pldapi.PeerInfo, err error) { + err = t.c.CallRPC(ctx, &peer, "transport_peerInfo", nodeName) + return +} From 5fe2739b167c1bd7876b5eb1160c0d63d2f182fb Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Fri, 3 Jan 2025 11:20:46 -0500 Subject: [PATCH 23/41] Reconcile interfaces for prepared TX Signed-off-by: Peter Broadhurst --- .../000014_peer_queued_messages.up.sql | 2 + .../sqlite/000014_peer_queued_messages.up.sql | 2 + core/go/internal/components/transaction.go | 11 +- core/go/internal/components/txmgr.go | 2 +- .../acknowledgment_writer.go | 94 -------- .../prepared_transaction_distributer.go | 225 ------------------ .../prepared_transaction_receiver.go | 55 ----- .../prepared_transaction_sender.go | 81 ------- .../received_prepared_transaction_writer.go | 102 -------- .../transport_client.go | 102 -------- .../privatetxnmgr/sequencer_dispatch.go | 12 +- .../privatetxnmgr/syncpoints/dispatch.go | 4 +- core/go/internal/transportmgr/peer.go | 6 +- .../go/internal/txmgr/prepared_transaction.go | 24 +- .../txmgr/prepared_transaction_test.go | 68 +++--- toolkit/go/pkg/pldapi/transaction.go | 8 +- 16 files changed, 73 insertions(+), 725 deletions(-) delete mode 100644 core/go/internal/preparedtxdistribution/acknowledgment_writer.go delete mode 100644 core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go delete mode 100644 core/go/internal/preparedtxdistribution/prepared_transaction_receiver.go delete mode 100644 core/go/internal/preparedtxdistribution/prepared_transaction_sender.go delete mode 100644 core/go/internal/preparedtxdistribution/received_prepared_transaction_writer.go delete mode 100644 core/go/internal/preparedtxdistribution/transport_client.go diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index 76fa86251..964b85731 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -3,6 +3,8 @@ BEGIN; -- These tables are replaced (data is not migrated from initial state distribution specific implementation) DROP TABLE state_distribution_acknowledgments; DROP TABLE state_distributions; +DROP TABLE prepared_txn_distribution_acknowledgments; +DROP TABLE prepared_txn_distributions; CREATE TABLE reliable_msgs ( "sequence" BIGINT GENERATED ALWAYS AS IDENTITY, diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index 741e427e9..6345de101 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -3,6 +3,8 @@ BEGIN; -- These tables are replaced (data is not migrated from initial state distribution specific implementation) DROP TABLE state_distribution_acknowledgments; DROP TABLE state_distributions; +DROP TABLE prepared_txn_distribution_acknowledgments; +DROP TABLE prepared_txn_distributions; CREATE TABLE reliable_msgs ( "sequence" INTEGER PRIMARY KEY AUTOINCREMENT, diff --git a/core/go/internal/components/transaction.go b/core/go/internal/components/transaction.go index 4038585c9..5b8b610cd 100644 --- a/core/go/internal/components/transaction.go +++ b/core/go/internal/components/transaction.go @@ -30,14 +30,9 @@ type TransactionStateRefs struct { Info []tktypes.HexBytes } -type PrepareTransactionWithRefs struct { - ID uuid.UUID // ID of the original private transaction - Domain string // domain of the original private transaction - To *tktypes.EthAddress // the private smart contract that was invoked - States TransactionStateRefs // the states associated with the original private transaction - Metadata tktypes.RawJSON // metadta produced from the prepare of the original private transaction, in addition to the prepared transaction - Transaction *pldapi.TransactionInput // the downstream transaction - might be public or private - Sender string // the sender of the original private transaction +type PreparedTransactionWithRefs struct { + pldapi.PreparedTransactionBase + StateRefs TransactionStateRefs `json:"stateRefs"` // the states associated with the original private transaction } type TransactionPreAssembly struct { diff --git a/core/go/internal/components/txmgr.go b/core/go/internal/components/txmgr.go index 28e10d27c..9a09083a5 100644 --- a/core/go/internal/components/txmgr.go +++ b/core/go/internal/components/txmgr.go @@ -108,5 +108,5 @@ type TXManager interface { PrepareInternalPrivateTransaction(ctx context.Context, dbTX *gorm.DB, tx *pldapi.TransactionInput, submitMode pldapi.SubmitMode) (func(), *ValidatedTransaction, error) UpsertInternalPrivateTxsFinalizeIDs(ctx context.Context, dbTX *gorm.DB, txis []*ValidatedTransaction) (postCommit func(), err error) - WritePreparedTransactions(ctx context.Context, dbTX *gorm.DB, prepared []*PrepareTransactionWithRefs) (postCommit func(), err error) + WritePreparedTransactions(ctx context.Context, dbTX *gorm.DB, prepared []*PreparedTransactionWithRefs) (postCommit func(), err error) } diff --git a/core/go/internal/preparedtxdistribution/acknowledgment_writer.go b/core/go/internal/preparedtxdistribution/acknowledgment_writer.go deleted file mode 100644 index 1f9e18545..000000000 --- a/core/go/internal/preparedtxdistribution/acknowledgment_writer.go +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package preparedtxdistribution - -import ( - "context" - - "github.com/google/uuid" - "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/flushwriter" - "github.com/kaleido-io/paladin/core/pkg/persistence" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "gorm.io/gorm" - "gorm.io/gorm/clause" -) - -type acknowledgementWriterNoResult struct{} -type acknowledgementWriteOperation struct { - PreparedTxnDistributionID string -} - -type acknowledgementWriter struct { - flushWriter flushwriter.Writer[*acknowledgementWriteOperation, *acknowledgementWriterNoResult] -} - -func NewAcknowledgementWriter(ctx context.Context, persistence persistence.Persistence, conf *pldconf.FlushWriterConfig) *acknowledgementWriter { - aw := &acknowledgementWriter{} - aw.flushWriter = flushwriter.NewWriter(ctx, aw.runBatch, persistence, conf, &pldconf.DistributerWriterConfigDefaults) - return aw -} - -func (wo *acknowledgementWriteOperation) WriteKey() string { - //no ordering requirements so just assign a worker at random for each write - return wo.PreparedTxnDistributionID -} - -type preparedTxnDistributionAcknowledgement struct { - PreparedTxnDistribution string `json:"preparedTxnDistribution" gorm:"column:prepared_txn_distribution"` - ID string `json:"id" gorm:"column:id"` -} - -func (aw *acknowledgementWriter) runBatch(ctx context.Context, tx *gorm.DB, values []*acknowledgementWriteOperation) (func(error), []flushwriter.Result[*acknowledgementWriterNoResult], error) { - log.L(ctx).Debugf("acknowledgementWriter:runBatch %d acknowledgements", len(values)) - - acknowledgements := make([]*preparedTxnDistributionAcknowledgement, 0, len(values)) - for _, value := range values { - acknowledgements = append(acknowledgements, &preparedTxnDistributionAcknowledgement{ - PreparedTxnDistribution: value.PreparedTxnDistributionID, - ID: uuid.New().String(), - }) - } - - err := tx. - Table("prepared_txn_distribution_acknowledgments"). - Clauses(clause.OnConflict{ - DoNothing: true, // immutable - }). - Create(acknowledgements). - Error - if err != nil { - log.L(ctx).Errorf("Error persisting prepared transaction distribution acknowledgements: %s", err) - } - - // We don't actually provide any result, so just build an array of nil results - return nil, make([]flushwriter.Result[*acknowledgementWriterNoResult], len(values)), err - -} - -func (aw *acknowledgementWriter) Start() { - aw.flushWriter.Start() -} - -func (aw *acknowledgementWriter) Stop() { - aw.flushWriter.Shutdown() -} - -func (aw *acknowledgementWriter) Queue(ctx context.Context, preparedTxnDistributionID string) { - aw.flushWriter.Queue(ctx, &acknowledgementWriteOperation{ - PreparedTxnDistributionID: preparedTxnDistributionID, - }) -} diff --git a/core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go b/core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go deleted file mode 100644 index c33d8c0a9..000000000 --- a/core/go/internal/preparedtxdistribution/prepared_transaction_distributer.go +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package preparedtxdistribution - -import ( - "context" - "encoding/json" - "time" - - "github.com/google/uuid" - "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/pkg/persistence" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/retry" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" -) - -const RETRY_TIMEOUT = 5 * time.Second - -func NewPreparedTransactionDistributer(ctx context.Context, nodeID string, transportManager components.TransportManager, txMgr components.TXManager, persistence persistence.Persistence, conf *pldconf.DistributerConfig) PreparedTransactionDistributer { - sd := &preparedTransactionDistributer{ - persistence: persistence, - inputChan: make(chan *PreparedTxnDistribution), - retryChan: make(chan string), - acknowledgedChan: make(chan string), - pendingMap: make(map[string]*PreparedTxnDistribution), - txMgr: txMgr, - transportManager: transportManager, - nodeID: nodeID, - retry: retry.NewRetryIndefinite(&pldconf.RetryConfig{}, &pldconf.GenericRetryDefaults.RetryConfig), - } - sd.acknowledgementWriter = NewAcknowledgementWriter(ctx, sd.persistence, &conf.AcknowledgementWriter) - sd.receivedPreparedTransactionWriter = NewReceivedPreparedTransactionWriter(ctx, txMgr, persistence, &conf.ReceivedObjectWriter) - - return sd -} - -type PreparedTxnDistributionPersisted struct { - Created tktypes.Timestamp `json:"created" gorm:"column:created;autoCreateTime:nano"` - ID string `json:"id"` - PreparedTxnID string `json:"preparedTxnID"` - IdentityLocator string `json:"identityLocator"` - DomainName string `json:"domainName"` - ContractAddress string `json:"contractAddress"` -} - -// A PreparedTxnDistribution is an intent to send a prepared transaction to a remote party -type PreparedTxnDistribution struct { - ID string - PreparedTxnID string - IdentityLocator string - Domain string - ContractAddress string - PreparedTransactionJSON []byte -} - -/* -PreparedTransactionDistributer is a component that is responsible for distributing prepared transactions to remote parties - - it runs in its own goroutine and periodically sends prepared transactions to the intended recipients - until each recipient has acknowledged receipt of the prepared transaction. - - This operates on in-memory data but will initialize from persistent storage on startup -*/ -type PreparedTransactionDistributer interface { - Start(ctx context.Context) error - Stop(ctx context.Context) - DistributePreparedTransactions(ctx context.Context, preparedTxnDistributions []*PreparedTxnDistribution) -} - -type preparedTransactionDistributer struct { - runCtx context.Context - stopRunCtx context.CancelFunc - persistence persistence.Persistence - txMgr components.TXManager - inputChan chan *PreparedTxnDistribution - retryChan chan string - acknowledgedChan chan string - pendingMap map[string]*PreparedTxnDistribution - acknowledgementWriter *acknowledgementWriter - receivedPreparedTransactionWriter *receivedPreparedTransactionWriter - transportManager components.TransportManager - nodeID string - retry *retry.Retry -} - -func (sd *preparedTransactionDistributer) Start(bgCtx context.Context) error { - sd.runCtx, sd.stopRunCtx = context.WithCancel(bgCtx) - ctx := sd.runCtx - log.L(ctx).Info("preparedTransactionDistributer:Start") - - sd.acknowledgementWriter.Start() - sd.receivedPreparedTransactionWriter.Start() - - // TODO: This needs to be a worker per-peer - probably a whole distributor per peer that can be swapped in/out. - // Currently it only runs on startup, and pushes all prepared transaction distributions from before the startup time into the distributor. - startTime := tktypes.TimestampNow() - go func() { - page := 0 - dispatched := 0 - var lastEntry *PreparedTxnDistributionPersisted - finished := false - for !finished { - err := sd.retry.Do(ctx, func(attempt int) (retryable bool, err error) { - page++ - var preparedTxnDistributions []*PreparedTxnDistributionPersisted - query := sd.persistence.DB().Table("prepared_txn_distributions"). - Select("prepared_txn_distributions.*"). - Joins("LEFT JOIN prepared_txn_distribution_acknowledgments ON prepared_txn_distributions.id = prepared_txn_distribution_acknowledgments.prepared_txn_distribution"). - Where("prepared_txn_distribution_acknowledgments.id IS NULL"). - Where("created < ?", startTime). - Order("created"). - Limit(100) - if lastEntry != nil { - query = query.Where("created > ?", lastEntry.Created) - } - err = query.Find(&preparedTxnDistributions).Error - - if err != nil { - log.L(ctx).Errorf("Error getting prepared transaction distributions: %s", err) - return true, err - } - - log.L(ctx).Infof("preparedTransactionDistributer loaded %d prepared transaction distributions on startup (page=%d)", len(preparedTxnDistributions), page) - - for _, preparedTxnDistribution := range preparedTxnDistributions { - preparedTxnID, err := uuid.Parse(preparedTxnDistribution.PreparedTxnID) - if err != nil { - log.L(ctx).Errorf("Error parsing prepared transaction ID: %s", err) - continue - } - preparedTransaction, err := sd.txMgr.GetPreparedTransactionByID(ctx, sd.persistence.DB() /* no TX for now */, preparedTxnID) - if err != nil { - log.L(ctx).Errorf("Error getting prepared transaction: %s", err) - continue - } - - preparedTransactionJSON, err := json.Marshal(preparedTransaction) - if err != nil { - log.L(ctx).Errorf("Error marshalling prepared transaction: %s", err) - continue - } - - sd.inputChan <- &PreparedTxnDistribution{ - ID: preparedTxnDistribution.ID, - PreparedTxnID: preparedTxnDistribution.PreparedTxnID, - IdentityLocator: preparedTxnDistribution.IdentityLocator, - Domain: preparedTxnDistribution.DomainName, - ContractAddress: preparedTxnDistribution.ContractAddress, - PreparedTransactionJSON: preparedTransactionJSON, - } - - dispatched++ - lastEntry = preparedTxnDistribution - } - finished = (len(preparedTxnDistributions) == 0) - return false, nil - }) - if err != nil { - log.L(ctx).Warnf("exiting before sending all recovered prepared transaction distributions") - } - } - log.L(ctx).Infof("preparedTransactionDistributer finished startup recovery after dispatching %d distributions", dispatched) - }() - - go func() { - log.L(ctx).Info("preparedTransactionDistributer:Loop starting loop") - for { - log.L(ctx).Debug("preparedTransactionDistributer:Loop waiting for next event") - - select { - case <-ctx.Done(): - return - case preparedTxnDistributionID := <-sd.acknowledgedChan: - _, stillPending := sd.pendingMap[preparedTxnDistributionID] - if stillPending { - log.L(ctx).Debugf("preparedTransactionDistributer:Loop processing acknowledgment %s", preparedTxnDistributionID) - - delete(sd.pendingMap, preparedTxnDistributionID) - } else { - log.L(ctx).Debugf("preparedTransactionDistributer:Loop already received acknowledgment %s", preparedTxnDistributionID) - - } - //if we didn't find it in the map, it was already acknowledged - - case preparedTxnDistributionID := <-sd.retryChan: - - pendingDistribution, stillPending := sd.pendingMap[preparedTxnDistributionID] - if stillPending { - log.L(ctx).Debugf("preparedTransactionDistributer:Loop retrying %s", preparedTxnDistributionID) - sd.sendPreparedTransaction(ctx, pendingDistribution) - } - //if we didn't find it in the map, it was already acknowledged - - case preparedTxnDistribution := <-sd.inputChan: - log.L(ctx).Debugf("preparedTransactionDistributer:Loop new distribution %s", preparedTxnDistribution.ID) - - sd.pendingMap[preparedTxnDistribution.ID] = preparedTxnDistribution - sd.sendPreparedTransaction(ctx, preparedTxnDistribution) - - } - } - }() - return nil -} - -func (sd *preparedTransactionDistributer) Stop(ctx context.Context) { - sd.stopRunCtx() - sd.acknowledgementWriter.Stop() - sd.receivedPreparedTransactionWriter.Stop() -} diff --git a/core/go/internal/preparedtxdistribution/prepared_transaction_receiver.go b/core/go/internal/preparedtxdistribution/prepared_transaction_receiver.go deleted file mode 100644 index 7663891c2..000000000 --- a/core/go/internal/preparedtxdistribution/prepared_transaction_receiver.go +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package preparedtxdistribution - -import ( - "context" - - "github.com/kaleido-io/paladin/core/internal/components" - pb "github.com/kaleido-io/paladin/core/pkg/proto/engine" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "google.golang.org/protobuf/proto" -) - -func (sd *preparedTransactionDistributer) sendPreparedTransactionAcknowledgement(ctx context.Context, domainName string, contractAddress string, preparedTxnId string, receivingParty string, distributingNode string, distributionID string) error { - log.L(ctx).Debugf("preparedTransactionDistributer:sendPreparedTransactionAcknowledgement domainName=%s contractAddress=%s preparedTxnId=%s receivingParty=%s distributingNode=%s distributionID=%s", domainName, contractAddress, preparedTxnId, receivingParty, distributingNode, distributionID) - preparedTransactionAcknowledgedMessage := &pb.PreparedTransactionAcknowledgedMessage{ - DomainName: domainName, - ContractAddress: contractAddress, - PreparedTxnId: preparedTxnId, - Party: receivingParty, - DistributionId: distributionID, - } - preparedTransactionAcknowledgedMessageBytes, err := proto.Marshal(preparedTransactionAcknowledgedMessage) - if err != nil { - log.L(ctx).Errorf("Error marshalling prepared transaction acknowledgment event: %s", err) - return err - } - - err = sd.transportManager.Send(ctx, &components.TransportMessage{ - MessageType: "PreparedTransactionAcknowledgedMessage", - Payload: preparedTransactionAcknowledgedMessageBytes, - Node: distributingNode, - Component: PREPARED_TRANSACTION_DISTRIBUTER_DESTINATION, - ReplyTo: sd.nodeID, - }) - if err != nil { - log.L(ctx).Errorf("Error sending prepared transaction produced event: %s", err) - return err - } - - return nil -} diff --git a/core/go/internal/preparedtxdistribution/prepared_transaction_sender.go b/core/go/internal/preparedtxdistribution/prepared_transaction_sender.go deleted file mode 100644 index 5e76b5f1c..000000000 --- a/core/go/internal/preparedtxdistribution/prepared_transaction_sender.go +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package preparedtxdistribution - -import ( - "context" - "time" - - "github.com/kaleido-io/paladin/core/internal/components" - pb "github.com/kaleido-io/paladin/core/pkg/proto/engine" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" - "google.golang.org/protobuf/proto" -) - -func (sd *preparedTransactionDistributer) DistributePreparedTransactions(ctx context.Context, preparedTxnDistributions []*PreparedTxnDistribution) { - log.L(ctx).Debugf("preparedTransactionDistributer:DistributePreparedTransactions %d prepared transaction distributions", len(preparedTxnDistributions)) - for _, preparedTxnDistribution := range preparedTxnDistributions { - sd.inputChan <- preparedTxnDistribution - } -} - -func (sd *preparedTransactionDistributer) sendPreparedTransaction(ctx context.Context, preparedTxnDistribution *PreparedTxnDistribution) { - log.L(ctx).Debugf("preparedTransactionDistributer:sendPreparedTransaction Domain: %s, ContractAddress: %s, PreparedTxnID: %s, IdentityLocator: %s, ID: %s", - preparedTxnDistribution.Domain, - preparedTxnDistribution.ContractAddress, - preparedTxnDistribution.PreparedTxnID, - preparedTxnDistribution.IdentityLocator, - preparedTxnDistribution.ID) - - preparedTransactionMessage := &pb.PreparedTransactionMessage{ - DomainName: preparedTxnDistribution.Domain, - ContractAddress: preparedTxnDistribution.ContractAddress, - PreparedTxnId: preparedTxnDistribution.PreparedTxnID, - Party: preparedTxnDistribution.IdentityLocator, - DistributionId: preparedTxnDistribution.ID, - PreparedTransactionJson: preparedTxnDistribution.PreparedTransactionJSON, - } - preparedTransactionMessageBytes, err := proto.Marshal(preparedTransactionMessage) - if err != nil { - log.L(ctx).Errorf("Error marshalling delegate transaction message: %s", err) - return - } - - targetNode, err := tktypes.PrivateIdentityLocator(preparedTxnDistribution.IdentityLocator).Node(ctx, false) - if err != nil { - log.L(ctx).Errorf("Error getting node for party %s", preparedTxnDistribution.IdentityLocator) - return - } - - err = sd.transportManager.Send(ctx, &components.TransportMessage{ - MessageType: "PreparedTransactionMessage", - Payload: preparedTransactionMessageBytes, - Node: targetNode, - Component: PREPARED_TRANSACTION_DISTRIBUTER_DESTINATION, - ReplyTo: sd.nodeID, - }) - if err != nil { - log.L(ctx).Errorf("Error sending prepared transaction produced event: %s", err) - return - } - - go func() { - time.Sleep(RETRY_TIMEOUT) - sd.retryChan <- preparedTxnDistribution.ID - }() - -} diff --git a/core/go/internal/preparedtxdistribution/received_prepared_transaction_writer.go b/core/go/internal/preparedtxdistribution/received_prepared_transaction_writer.go deleted file mode 100644 index 7d21c7039..000000000 --- a/core/go/internal/preparedtxdistribution/received_prepared_transaction_writer.go +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package preparedtxdistribution - -import ( - "context" - - "github.com/kaleido-io/paladin/config/pkg/pldconf" - "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/internal/flushwriter" - "github.com/kaleido-io/paladin/core/pkg/persistence" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" - "gorm.io/gorm" -) - -type receivedPreparedTransactionWriterNoResult struct{} - -type receivedPreparedTransactionWriteOperation struct { - DomainName string - ContractAddress tktypes.EthAddress - PreparedTransaction *components.PrepareTransactionWithRefs -} - -type receivedPreparedTransactionWriter struct { - flushWriter flushwriter.Writer[*receivedPreparedTransactionWriteOperation, *receivedPreparedTransactionWriterNoResult] - txMgr components.TXManager -} - -func NewReceivedPreparedTransactionWriter(ctx context.Context, txMgr components.TXManager, persistence persistence.Persistence, conf *pldconf.FlushWriterConfig) *receivedPreparedTransactionWriter { - rsw := &receivedPreparedTransactionWriter{ - txMgr: txMgr, - } - rsw.flushWriter = flushwriter.NewWriter(ctx, rsw.runBatch, persistence, conf, &pldconf.DistributerWriterConfigDefaults) - return rsw -} - -func (wo *receivedPreparedTransactionWriteOperation) WriteKey() string { - return wo.DomainName -} - -func (rsw *receivedPreparedTransactionWriter) runBatch(ctx context.Context, dbTX *gorm.DB, values []*receivedPreparedTransactionWriteOperation) (func(error), []flushwriter.Result[*receivedPreparedTransactionWriterNoResult], error) { - log.L(ctx).Debugf("receivedPreparedTransactionWriter:runBatch %d acknowledgements", len(values)) - - if len(values) == 0 { - return nil, nil, nil - } - - preparedTransactions := make([]*components.PrepareTransactionWithRefs, len(values)) - for i, receivedPreparedTransactionWriteOperation := range values { - - preparedTransactions[i] = receivedPreparedTransactionWriteOperation.PreparedTransaction - } - postCommit, err := rsw.txMgr.WritePreparedTransactions(ctx, dbTX, preparedTransactions) - if err != nil { - log.L(ctx).Errorf("Error persisting prepared transactions: %s", err) - return nil, nil, err - } - // We don't actually provide any result, so just build an array of nil results - return func(err error) { - if err == nil { - postCommit() - } - }, make([]flushwriter.Result[*receivedPreparedTransactionWriterNoResult], len(values)), nil - -} - -func (rsw *receivedPreparedTransactionWriter) Start() { - rsw.flushWriter.Start() -} - -func (rsw *receivedPreparedTransactionWriter) Stop() { - rsw.flushWriter.Shutdown() -} - -func (rsw *receivedPreparedTransactionWriter) QueueAndWait(ctx context.Context, domainName string, contractAddress tktypes.EthAddress, receivedTransaction *components.PrepareTransactionWithRefs) error { - log.L(ctx).Debugf("receivedPreparedTransactionWriter:QueueAndWait %s %s ", domainName, contractAddress) - - op := rsw.flushWriter.Queue(ctx, &receivedPreparedTransactionWriteOperation{ - DomainName: domainName, - ContractAddress: contractAddress, - PreparedTransaction: receivedTransaction, - }) - _, err := op.WaitFlushed(ctx) - if err != nil { - log.L(ctx).Errorf("Error waiting for prepared transaction distribution write: %s", err) - } - return err -} diff --git a/core/go/internal/preparedtxdistribution/transport_client.go b/core/go/internal/preparedtxdistribution/transport_client.go deleted file mode 100644 index e23ab6ec3..000000000 --- a/core/go/internal/preparedtxdistribution/transport_client.go +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright © 2024 Kaleido, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package preparedtxdistribution - -import ( - "context" - "encoding/json" - - "github.com/kaleido-io/paladin/core/internal/components" - pb "github.com/kaleido-io/paladin/core/pkg/proto/engine" - "github.com/kaleido-io/paladin/toolkit/pkg/log" - "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" - "google.golang.org/protobuf/proto" -) - -func (sd *preparedTransactionDistributer) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { - log.L(ctx).Debugf("preparedTransactionDistributer:HandlePaladinMsg") - messagePayload := message.Payload - - switch message.MessageType { - case "PreparedTransactionMessage": - distributingNode := message.ReplyTo - go sd.handlePreparedTransactionMessage(ctx, messagePayload, distributingNode) - case "PreparedTransactionAcknowledgedMessage": - go sd.handlePreparedTransactionAcknowledgedMessage(ctx, message.Payload) - default: - log.L(ctx).Errorf("Unknown message type: %s", message.MessageType) - } -} - -func (sd *preparedTransactionDistributer) handlePreparedTransactionMessage(ctx context.Context, messagePayload []byte, distributingNode string) { - log.L(ctx).Debugf("preparedTransactionDistributer:handlePreparedTransactionMessage") - preparedTransactionMessage := &pb.PreparedTransactionMessage{} - err := proto.Unmarshal(messagePayload, preparedTransactionMessage) - if err != nil { - log.L(ctx).Errorf("Failed to unmarshal PreparedTransactionMessage: %s", err) - return - } - - receivedTransaction := new(components.PrepareTransactionWithRefs) - err = json.Unmarshal(preparedTransactionMessage.PreparedTransactionJson, receivedTransaction) - if err != nil { - log.L(ctx).Errorf("Error unmarshalling prepared transaction json: %s", err) - } - - err = sd.receivedPreparedTransactionWriter.QueueAndWait( - ctx, - preparedTransactionMessage.DomainName, - *tktypes.MustEthAddress(preparedTransactionMessage.ContractAddress), - receivedTransaction, - ) - if err != nil { - log.L(ctx).Errorf("Error writing prepared transaction: %s", err) - //don't send the acknowledgement, with a bit of luck, the sender will retry and we will get it next time - return - } - - // No error means either this is the first time we have received this prepared transaction or we already have it an onConflict ignore means we idempotently accept it - // If the latter, then the sender probably didn't get our previous acknowledgement so either way, we send an acknowledgement - - err = sd.sendPreparedTransactionAcknowledgement( - ctx, - preparedTransactionMessage.DomainName, - preparedTransactionMessage.ContractAddress, - preparedTransactionMessage.PreparedTxnId, - preparedTransactionMessage.Party, - distributingNode, - preparedTransactionMessage.DistributionId) - if err != nil { - log.L(ctx).Errorf("Error sending prepared transaction acknowledgement: %s", err) - //not much more we can do here. The sender will inevitably retry and we will hopefully send the ack next time - } -} - -func (sd *preparedTransactionDistributer) handlePreparedTransactionAcknowledgedMessage(ctx context.Context, messagePayload []byte) { - log.L(ctx).Debugf("preparedTransactionDistributer:handlePreparedTransactionAcknowledgedMessage") - preparedTransactionAcknowledgedMessage := &pb.PreparedTransactionAcknowledgedMessage{} - err := proto.Unmarshal(messagePayload, preparedTransactionAcknowledgedMessage) - if err != nil { - log.L(ctx).Errorf("Failed to unmarshal PreparedTransactionAcknowledgedMessage: %s", err) - return - } - sd.acknowledgementWriter.Queue(ctx, preparedTransactionAcknowledgedMessage.DistributionId) - // no need to wait for the flush to complete, we can just stop the in memory loop from retrying - // worst case scenario, we crash before this is written to the DB, we do some redundant retries after a restart - // but waiting for the flush here is not going to prevent that - sd.acknowledgedChan <- preparedTransactionAcknowledgedMessage.DistributionId - -} diff --git a/core/go/internal/privatetxnmgr/sequencer_dispatch.go b/core/go/internal/privatetxnmgr/sequencer_dispatch.go index 176e6572d..b9d8b6b43 100644 --- a/core/go/internal/privatetxnmgr/sequencer_dispatch.go +++ b/core/go/internal/privatetxnmgr/sequencer_dispatch.go @@ -216,8 +216,8 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa } -func mapPreparedTransaction(tx *components.PrivateTransaction) *components.PrepareTransactionWithRefs { - pt := &components.PrepareTransactionWithRefs{ +func mapPreparedTransaction(tx *components.PrivateTransaction) *components.PreparedTransactionWithRefs { + pt := &components.PreparedTransactionWithRefs{ ID: tx.ID, Domain: tx.Domain, To: &tx.Address, @@ -225,16 +225,16 @@ func mapPreparedTransaction(tx *components.PrivateTransaction) *components.Prepa Sender: tx.PreAssembly.TransactionSpecification.From, } for _, s := range tx.PostAssembly.InputStates { - pt.States.Spent = append(pt.States.Spent, s.ID) + pt.StateRefs.Spent = append(pt.StateRefs.Spent, s.ID) } for _, s := range tx.PostAssembly.ReadStates { - pt.States.Read = append(pt.States.Read, s.ID) + pt.StateRefs.Read = append(pt.StateRefs.Read, s.ID) } for _, s := range tx.PostAssembly.OutputStates { - pt.States.Confirmed = append(pt.States.Confirmed, s.ID) + pt.StateRefs.Confirmed = append(pt.StateRefs.Confirmed, s.ID) } for _, s := range tx.PostAssembly.InfoStates { - pt.States.Info = append(pt.States.Info, s.ID) + pt.StateRefs.Info = append(pt.StateRefs.Info, s.ID) } if tx.PreparedPublicTransaction != nil { pt.Transaction = tx.PreparedPublicTransaction diff --git a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go index 4f9719da4..8ae4864e6 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go +++ b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go @@ -31,7 +31,7 @@ import ( type dispatchOperation struct { publicDispatches []*PublicDispatch privateDispatches []*components.ValidatedTransaction - preparedTransactions []*components.PrepareTransactionWithRefs + preparedTransactions []*components.PreparedTransactionWithRefs preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistributionPersisted } @@ -53,7 +53,7 @@ type PublicDispatch struct { type DispatchBatch struct { PublicDispatches []*PublicDispatch PrivateDispatches []*components.ValidatedTransaction - PreparedTransactions []*components.PrepareTransactionWithRefs + PreparedTransactions []*components.PreparedTransactionWithRefs } // PersistDispatches persists the dispatches to the database and coordinates with the public transaction manager diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index e14187fc1..6480b99f8 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -191,11 +191,11 @@ func (tm *transportManager) connectPeer(ctx context.Context, nodeName string, se tm.peers[nodeName] = p if sending { - if err := p.startSender(); err != nil { + p.OutboundError = p.startSender() + if p.OutboundError != nil { // Note the peer is still in our list, but not connected for send. // This means status can be reported for it. - p.OutboundError = err - return nil, err + return nil, p.OutboundError } } diff --git a/core/go/internal/txmgr/prepared_transaction.go b/core/go/internal/txmgr/prepared_transaction.go index c84e26827..9bc377960 100644 --- a/core/go/internal/txmgr/prepared_transaction.go +++ b/core/go/internal/txmgr/prepared_transaction.go @@ -71,7 +71,7 @@ var preparedTransactionFilters = filters.FieldMap{ "created": filters.TimestampField("created"), } -func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.DB, prepared []*components.PrepareTransactionWithRefs) (postCommit func(), err error) { +func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.DB, prepared []*components.PreparedTransactionWithRefs) (postCommit func(), err error) { var preparedTxInserts []*preparedTransaction var preparedTxStateInserts []*preparedTransactionState @@ -84,7 +84,7 @@ func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.D Metadata: p.Metadata, } // We do the work for the ABI validation etc. before we insert the TX - txPostCommit, resolved, err := tm.resolveNewTransaction(ctx, dbTX, p.Transaction, pldapi.SubmitModePrepare) + txPostCommit, resolved, err := tm.resolveNewTransaction(ctx, dbTX, &p.Transaction, pldapi.SubmitModePrepare) if err == nil { p.Transaction.ABI = nil // move to the reference p.Transaction.ABIReference = resolved.Function.ABIReference @@ -96,7 +96,7 @@ func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.D } postCommits = append(postCommits, txPostCommit) preparedTxInserts = append(preparedTxInserts, dbPreparedTx) - for i, stateID := range p.States.Spent { + for i, stateID := range p.StateRefs.Spent { preparedTxStateInserts = append(preparedTxStateInserts, &preparedTransactionState{ Transaction: p.ID, Type: preparedSpend, @@ -105,7 +105,7 @@ func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.D StateIdx: i, }) } - for i, stateID := range p.States.Read { + for i, stateID := range p.StateRefs.Read { preparedTxStateInserts = append(preparedTxStateInserts, &preparedTransactionState{ Transaction: p.ID, Type: preparedRead, @@ -114,7 +114,7 @@ func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.D StateIdx: i, }) } - for i, stateID := range p.States.Confirmed { + for i, stateID := range p.StateRefs.Confirmed { preparedTxStateInserts = append(preparedTxStateInserts, &preparedTransactionState{ Transaction: p.ID, Type: preparedConfirm, @@ -123,7 +123,7 @@ func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.D StateIdx: i, }) } - for i, stateID := range p.States.Info { + for i, stateID := range p.StateRefs.Info { preparedTxStateInserts = append(preparedTxStateInserts, &preparedTransactionState{ Transaction: p.ID, Type: preparedInfo, @@ -133,7 +133,7 @@ func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.D }) } log.L(ctx).Infof("Inserting prepared %s transaction for transaction %s with spent=%d read=%d confirmed=%d info=%d", - p.Transaction.Type, p.ID, len(p.States.Spent), len(p.States.Read), len(p.States.Confirmed), len(p.States.Info)) + p.Transaction.Type, p.ID, len(p.StateRefs.Spent), len(p.StateRefs.Read), len(p.StateRefs.Confirmed), len(p.StateRefs.Info)) } if len(preparedTxInserts) > 0 { @@ -168,10 +168,12 @@ func (tm *txManager) QueryPreparedTransactions(ctx context.Context, dbTX *gorm.D query: jq, mapResult: func(pt *preparedTransaction) (*pldapi.PreparedTransaction, error) { preparedTx := &pldapi.PreparedTransaction{ - ID: pt.ID, - Domain: pt.Domain, - To: pt.To, - Metadata: pt.Metadata, + PreparedTransactionBase: pldapi.PreparedTransactionBase{ + ID: pt.ID, + Domain: pt.Domain, + To: pt.To, + Metadata: pt.Metadata, + }, } return preparedTx, json.Unmarshal(pt.Transaction, &preparedTx.Transaction) }, diff --git a/core/go/internal/txmgr/prepared_transaction_test.go b/core/go/internal/txmgr/prepared_transaction_test.go index 53f90ae23..d93343177 100644 --- a/core/go/internal/txmgr/prepared_transaction_test.go +++ b/core/go/internal/txmgr/prepared_transaction_test.go @@ -131,28 +131,30 @@ func TestPreparedTransactionRealDB(t *testing.T) { info, infoIDs := writeStates(t, txm, testSchemaID, contractAddressDomain1, 1) childFnABI := abi.ABI{{Type: abi.Function, Name: "doThing2"}} - ptInsert := &components.PrepareTransactionWithRefs{ - ID: *parentTx.Transaction.ID, - Domain: parentTx.Transaction.Domain, - To: &contractAddressDomain1, - Transaction: &pldapi.TransactionInput{ - TransactionBase: pldapi.TransactionBase{ - From: "me@node1", - IdempotencyKey: "child_txn", - Type: pldapi.TransactionTypePrivate.Enum(), - Domain: "domain2", - To: &contractAddressDomain2, - Function: "doThing2", + ptInsert := &components.PreparedTransactionWithRefs{ + PreparedTransactionBase: pldapi.PreparedTransactionBase{ + ID: *parentTx.Transaction.ID, + Domain: parentTx.Transaction.Domain, + To: &contractAddressDomain1, + Metadata: tktypes.RawJSON(`{"some":"data"}`), + Transaction: pldapi.TransactionInput{ + TransactionBase: pldapi.TransactionBase{ + From: "me@node1", + IdempotencyKey: "child_txn", + Type: pldapi.TransactionTypePrivate.Enum(), + Domain: "domain2", + To: &contractAddressDomain2, + Function: "doThing2", + }, + ABI: childFnABI, }, - ABI: childFnABI, }, - States: components.TransactionStateRefs{ + StateRefs: components.TransactionStateRefs{ Spent: spentIDs, Read: readIDs, Confirmed: confirmIDs, Info: infoIDs, }, - Metadata: tktypes.RawJSON(`{"some":"data"}`), } postCommit, storedABI, err := txm.UpsertABI(ctx, txm.p.DB(), childFnABI) @@ -160,7 +162,7 @@ func TestPreparedTransactionRealDB(t *testing.T) { postCommit() // Write the prepared TX it results in - postCommit, err = txm.WritePreparedTransactions(ctx, txm.p.DB(), []*components.PrepareTransactionWithRefs{ptInsert}) + postCommit, err = txm.WritePreparedTransactions(ctx, txm.p.DB(), []*components.PreparedTransactionWithRefs{ptInsert}) require.NoError(t, err) postCommit() @@ -168,20 +170,23 @@ func TestPreparedTransactionRealDB(t *testing.T) { pt, err := txm.GetPreparedTransactionByID(ctx, txm.p.DB(), *parentTx.Transaction.ID) require.NoError(t, err) require.Equal(t, &pldapi.PreparedTransaction{ - ID: *parentTx.Transaction.ID, - Domain: "domain1", - To: &contractAddressDomain1, - Transaction: pldapi.TransactionInput{ - TransactionBase: pldapi.TransactionBase{ - From: "me@node1", - IdempotencyKey: "child_txn", - Type: pldapi.TransactionTypePrivate.Enum(), - Domain: "domain2", - To: &contractAddressDomain2, - Function: "doThing2()", // now fully qualified - ABIReference: &storedABI.Hash, // now resolved - Data: tktypes.RawJSON(`{}`), // normalized + PreparedTransactionBase: pldapi.PreparedTransactionBase{ + ID: *parentTx.Transaction.ID, + Domain: "domain1", + To: &contractAddressDomain1, + Transaction: pldapi.TransactionInput{ + TransactionBase: pldapi.TransactionBase{ + From: "me@node1", + IdempotencyKey: "child_txn", + Type: pldapi.TransactionTypePrivate.Enum(), + Domain: "domain2", + To: &contractAddressDomain2, + Function: "doThing2()", // now fully qualified + ABIReference: &storedABI.Hash, // now resolved + Data: tktypes.RawJSON(`{}`), // normalized + }, }, + Metadata: tktypes.RawJSON(`{"some":"data"}`), }, States: pldapi.TransactionStates{ Spent: spent, @@ -189,7 +194,6 @@ func TestPreparedTransactionRealDB(t *testing.T) { Confirmed: confirm, Info: info, }, - Metadata: tktypes.RawJSON(`{"some":"data"}`), }, pt) } @@ -199,9 +203,7 @@ func TestWritePreparedTransactionsBadTX(t *testing.T) { ctx, txm, done := newTestTransactionManager(t, false) defer done() - _, err := txm.WritePreparedTransactions(ctx, txm.p.DB(), []*components.PrepareTransactionWithRefs{{ - Transaction: &pldapi.TransactionInput{}, - }}) + _, err := txm.WritePreparedTransactions(ctx, txm.p.DB(), []*components.PreparedTransactionWithRefs{{}}) assert.Regexp(t, "PD012211", err) } diff --git a/toolkit/go/pkg/pldapi/transaction.go b/toolkit/go/pkg/pldapi/transaction.go index cbc567598..5013ed90f 100644 --- a/toolkit/go/pkg/pldapi/transaction.go +++ b/toolkit/go/pkg/pldapi/transaction.go @@ -163,11 +163,15 @@ type TransactionDependencies struct { PrereqOf []uuid.UUID `docstruct:"TransactionDependencies" json:"prereqOf"` } -type PreparedTransaction struct { +type PreparedTransactionBase struct { ID uuid.UUID `docstruct:"PreparedTransaction" json:"id"` Domain string `docstruct:"PreparedTransaction" json:"domain"` To *tktypes.EthAddress `docstruct:"PreparedTransaction" json:"to"` Transaction TransactionInput `docstruct:"PreparedTransaction" json:"transaction"` Metadata tktypes.RawJSON `docstruct:"PreparedTransaction" json:"metadata,omitempty"` - States TransactionStates `docstruct:"PreparedTransaction" json:"states"` +} + +type PreparedTransaction struct { + PreparedTransactionBase + States TransactionStates `docstruct:"PreparedTransaction" json:"states"` } From 60f76c4771f0999e32135a6752bd97df7f3fdaea Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sat, 4 Jan 2025 11:36:35 -0500 Subject: [PATCH 24/41] Add support for receipt and prepared TX reliable msgs Signed-off-by: Peter Broadhurst --- core/go/build.gradle | 6 - core/go/internal/components/transaction.go | 2 +- core/go/internal/components/transportmgr.go | 5 +- core/go/internal/components/txmgr.go | 2 + .../internal/privatetxnmgr/private_txn_mgr.go | 38 +-- core/go/internal/privatetxnmgr/sequencer.go | 39 ++- .../privatetxnmgr/sequencer_dispatch.go | 24 +- .../internal/privatetxnmgr/sequencer_test.go | 63 ++--- .../privatetxnmgr/syncpoints/dispatch.go | 12 +- .../privatetxnmgr/syncpoints/syncpoints.go | 3 +- core/go/internal/transportmgr/manager.go | 2 + core/go/internal/transportmgr/manager_test.go | 3 + core/go/internal/transportmgr/peer.go | 30 +- core/go/internal/transportmgr/peer_test.go | 2 +- .../transportmgr/reliable_msg_handler.go | 91 ++++++- .../transportmgr/reliable_msg_handler_test.go | 256 ++++++++++++++---- .../go/internal/txmgr/prepared_transaction.go | 96 ++++++- .../txmgr/prepared_transaction_test.go | 105 +++++-- toolkit/go/pkg/pldapi/transaction.go | 2 +- 19 files changed, 538 insertions(+), 243 deletions(-) diff --git a/core/go/build.gradle b/core/go/build.gradle index 545e31283..cd3c402c1 100644 --- a/core/go/build.gradle +++ b/core/go/build.gradle @@ -231,12 +231,6 @@ task makeMocks(type: Mockery, dependsOn: [":installMockery", protoc, goGet]) { outputDir 'mocks/ethclientmocks' } mock { - inputDir 'internal/preparedtxdistribution' - includeAll true - outputPackage 'preparedtxdistributionmocks' - outputDir 'mocks/preparedtxdistributionmocks' - } - mock { inputDir 'internal/privatetxnmgr/ptmgrtypes' name "TransactionFlow" inpackage true diff --git a/core/go/internal/components/transaction.go b/core/go/internal/components/transaction.go index 5b8b610cd..c330d7005 100644 --- a/core/go/internal/components/transaction.go +++ b/core/go/internal/components/transaction.go @@ -31,7 +31,7 @@ type TransactionStateRefs struct { } type PreparedTransactionWithRefs struct { - pldapi.PreparedTransactionBase + *pldapi.PreparedTransactionBase StateRefs TransactionStateRefs `json:"stateRefs"` // the states associated with the original private transaction } diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index f2f7a83c8..ef0383481 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -38,8 +38,9 @@ type FireAndForgetMessageSend struct { type ReliableMessageType string const ( - RMTState ReliableMessageType = "state" - RMTReceipt ReliableMessageType = "receipt" + RMTState ReliableMessageType = "state" + RMTReceipt ReliableMessageType = "receipt" + RMTPreparedTransaction ReliableMessageType = "prepared_txn" ) func (t ReliableMessageType) Enum() tktypes.Enum[ReliableMessageType] { diff --git a/core/go/internal/components/txmgr.go b/core/go/internal/components/txmgr.go index 9a09083a5..8aa04bb1d 100644 --- a/core/go/internal/components/txmgr.go +++ b/core/go/internal/components/txmgr.go @@ -100,7 +100,9 @@ type TXManager interface { QueryTransactionReceipts(ctx context.Context, jq *query.QueryJSON) ([]*pldapi.TransactionReceipt, error) GetTransactionReceiptByID(ctx context.Context, id uuid.UUID) (*pldapi.TransactionReceipt, error) GetPreparedTransactionByID(ctx context.Context, dbTX *gorm.DB, id uuid.UUID) (*pldapi.PreparedTransaction, error) + GetPreparedTransactionWithRefsByID(ctx context.Context, dbTX *gorm.DB, id uuid.UUID) (*PreparedTransactionWithRefs, error) QueryPreparedTransactions(ctx context.Context, dbTX *gorm.DB, jq *query.QueryJSON) ([]*pldapi.PreparedTransaction, error) + QueryPreparedTransactionsWithRefs(ctx context.Context, dbTX *gorm.DB, jq *query.QueryJSON) ([]*PreparedTransactionWithRefs, error) CallTransaction(ctx context.Context, result any, tx *pldapi.TransactionCall) (err error) UpsertABI(ctx context.Context, dbTX *gorm.DB, a abi.ABI) (func(), *pldapi.StoredABI, error) diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr.go b/core/go/internal/privatetxnmgr/private_txn_mgr.go index 872f3426e..0f59f0c60 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr.go @@ -24,7 +24,6 @@ import ( "github.com/hyperledger/firefly-common/pkg/i18n" "github.com/hyperledger/firefly-signer/pkg/abi" "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/syncpoints" "gorm.io/gorm" @@ -46,19 +45,18 @@ import ( ) type privateTxManager struct { - ctx context.Context - ctxCancel func() - config *pldconf.PrivateTxManagerConfig - sequencers map[string]*Sequencer - sequencersLock sync.RWMutex - endorsementGatherers map[string]ptmgrtypes.EndorsementGatherer - components components.AllComponents - nodeName string - subscribers []components.PrivateTxEventSubscriber - subscribersLock sync.Mutex - syncPoints syncpoints.SyncPoints - preparedTransactionDistributer preparedtxdistribution.PreparedTransactionDistributer - blockHeight int64 + ctx context.Context + ctxCancel func() + config *pldconf.PrivateTxManagerConfig + sequencers map[string]*Sequencer + sequencersLock sync.RWMutex + endorsementGatherers map[string]ptmgrtypes.EndorsementGatherer + components components.AllComponents + nodeName string + subscribers []components.PrivateTxEventSubscriber + subscribersLock sync.Mutex + syncPoints syncpoints.SyncPoints + blockHeight int64 } // Init implements Engine. @@ -79,15 +77,7 @@ func (p *privateTxManager) PostInit(c components.AllComponents) error { p.components = c p.nodeName = p.components.TransportManager().LocalNodeName() p.syncPoints = syncpoints.NewSyncPoints(p.ctx, &p.config.Writer, c.Persistence(), c.TxManager(), c.PublicTxManager()) - p.preparedTransactionDistributer = preparedtxdistribution.NewPreparedTransactionDistributer( - p.ctx, - p.nodeName, - p.components.TransportManager(), - p.components.TxManager(), - p.components.Persistence(), - &p.config.PreparedTransactionDistributer) - - return p.preparedTransactionDistributer.Start(p.ctx) + return nil } func (p *privateTxManager) Start() error { @@ -96,8 +86,6 @@ func (p *privateTxManager) Start() error { } func (p *privateTxManager) Stop() { - p.stateDistributer.Stop(p.ctx) - } func NewPrivateTransactionMgr(ctx context.Context, config *pldconf.PrivateTxManagerConfig) components.PrivateTxManager { diff --git a/core/go/internal/privatetxnmgr/sequencer.go b/core/go/internal/privatetxnmgr/sequencer.go index 6cf7c386f..49f22a4d5 100644 --- a/core/go/internal/privatetxnmgr/sequencer.go +++ b/core/go/internal/privatetxnmgr/sequencer.go @@ -27,7 +27,6 @@ import ( "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/msgs" - "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/syncpoints" pbEngine "github.com/kaleido-io/paladin/core/pkg/proto/engine" @@ -108,25 +107,24 @@ type Sequencer struct { pendingTransactionEvents chan ptmgrtypes.PrivateTransactionEvent - contractAddress tktypes.EthAddress // the contract address managed by the current sequencer - defaultSigner string - nodeName string - domainAPI components.DomainSmartContract - coordinatorDomainContext components.DomainContext - delegateDomainContext components.DomainContext - components components.AllComponents - endorsementGatherer ptmgrtypes.EndorsementGatherer - publisher ptmgrtypes.Publisher - identityResolver components.IdentityResolver - syncPoints syncpoints.SyncPoints - preparedTransactionDistributer preparedtxdistribution.PreparedTransactionDistributer - transportWriter ptmgrtypes.TransportWriter - graph Graph - requestTimeout time.Duration - coordinatorSelector ptmgrtypes.CoordinatorSelector - newBlockEvents chan int64 - assembleCoordinator ptmgrtypes.AssembleCoordinator - environment *sequencerEnvironment + contractAddress tktypes.EthAddress // the contract address managed by the current sequencer + defaultSigner string + nodeName string + domainAPI components.DomainSmartContract + coordinatorDomainContext components.DomainContext + delegateDomainContext components.DomainContext + components components.AllComponents + endorsementGatherer ptmgrtypes.EndorsementGatherer + publisher ptmgrtypes.Publisher + identityResolver components.IdentityResolver + syncPoints syncpoints.SyncPoints + transportWriter ptmgrtypes.TransportWriter + graph Graph + requestTimeout time.Duration + coordinatorSelector ptmgrtypes.CoordinatorSelector + newBlockEvents chan int64 + assembleCoordinator ptmgrtypes.AssembleCoordinator + environment *sequencerEnvironment } func NewSequencer( @@ -141,7 +139,6 @@ func NewSequencer( publisher ptmgrtypes.Publisher, syncPoints syncpoints.SyncPoints, identityResolver components.IdentityResolver, - preparedTransactionDistributer preparedtxdistribution.PreparedTransactionDistributer, transportWriter ptmgrtypes.TransportWriter, requestTimeout time.Duration, blockHeight int64, diff --git a/core/go/internal/privatetxnmgr/sequencer_dispatch.go b/core/go/internal/privatetxnmgr/sequencer_dispatch.go index b9d8b6b43..cdd0f993e 100644 --- a/core/go/internal/privatetxnmgr/sequencer_dispatch.go +++ b/core/go/internal/privatetxnmgr/sequencer_dispatch.go @@ -17,14 +17,11 @@ package privatetxnmgr import ( "context" - "encoding/json" "fmt" - "github.com/google/uuid" "github.com/hyperledger/firefly-common/pkg/i18n" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/msgs" - "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/syncpoints" @@ -47,7 +44,7 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa stateDistributions := make([]*components.StateDistributionWithData, 0) localStateDistributions := make([]*components.StateDistributionWithData, 0) - preparedTxnDistributions := make([]*preparedtxdistribution.PreparedTxnDistribution, 0) + preparedTxnDistributions := make([]*components.PreparedTransactionWithRefs, 0) for signingAddress, transactionFlows := range dispatchableTransactions { log.L(ctx).Debugf("DispatchTransactions: %d transactions for signingAddress %s", len(transactionFlows), signingAddress) @@ -90,20 +87,7 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa log.L(ctx).Infof("Result of transaction %s is a prepared transaction public=%t private=%t", preparedTransaction.ID, hasPublicTransaction, hasPrivateTransaction) preparedTransactionWithRefs := mapPreparedTransaction(preparedTransaction) dispatchBatch.PreparedTransactions = append(dispatchBatch.PreparedTransactions, preparedTransactionWithRefs) - preparedTransactionJSON, err := json.Marshal(preparedTransactionWithRefs) - if err != nil { - log.L(ctx).Errorf("Error marshalling prepared transaction: %s", err) - // TODO: this is just an error situation for one transaction - this function is a batch function - return err - } - preparedTxnDistributions = append(preparedTxnDistributions, &preparedtxdistribution.PreparedTxnDistribution{ - ID: uuid.New().String(), - PreparedTxnID: preparedTransactionWithRefs.ID.String(), - IdentityLocator: preparedTransactionWithRefs.Sender, - Domain: preparedTransactionWithRefs.Domain, - ContractAddress: preparedTransactionWithRefs.To.String(), - PreparedTransactionJSON: preparedTransactionJSON, - }) + preparedTxnDistributions = append(preparedTxnDistributions, preparedTransactionWithRefs) default: err = i18n.NewError(ctx, msgs.MsgPrivateTxMgrInvalidPrepareOutcome, preparedTransaction.ID, preparedTransaction.Intent, hasPublicTransaction, hasPrivateTransaction) @@ -179,7 +163,7 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa // Determine if there are any local nullifiers that need to be built and put into the domain context // before we persist the dispatch batch - localNullifiers, err := s.stateDistributer.BuildNullifiers(ctx, localStateDistributions) + localNullifiers, err := s.privateTxManager.BuildNullifiers(ctx, localStateDistributions) if err == nil && len(localNullifiers) > 0 { err = dCtx.UpsertNullifiers(localNullifiers...) } @@ -201,7 +185,7 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa s.publisher.PublishTransactionPreparedEvent(ctx, preparedTransaction.ID.String()) } //now that the DB write has been persisted, we can trigger the in-memory distribution of the prepared transactions and states - s.stateDistributer.DistributeStates(ctx, stateDistributions) + s.todo.DistributeStates(ctx, stateDistributions) s.preparedTransactionDistributer.DistributePreparedTransactions(ctx, preparedTxnDistributions) diff --git a/core/go/internal/privatetxnmgr/sequencer_test.go b/core/go/internal/privatetxnmgr/sequencer_test.go index e3ed7df23..bc9ae7d54 100644 --- a/core/go/internal/privatetxnmgr/sequencer_test.go +++ b/core/go/internal/privatetxnmgr/sequencer_test.go @@ -26,7 +26,6 @@ import ( "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/syncpoints" "github.com/kaleido-io/paladin/core/mocks/componentmocks" - "github.com/kaleido-io/paladin/core/mocks/preparedtxdistributionmocks" "github.com/kaleido-io/paladin/core/mocks/privatetxnmgrmocks" "github.com/kaleido-io/paladin/core/pkg/persistence" @@ -39,22 +38,21 @@ import ( ) type sequencerDepencyMocks struct { - allComponents *componentmocks.AllComponents - privateTxManager *componentmocks.PrivateTxManager - domainSmartContract *componentmocks.DomainSmartContract - domainContext *componentmocks.DomainContext - domainMgr *componentmocks.DomainManager - domain *componentmocks.Domain - transportManager *componentmocks.TransportManager - stateStore *componentmocks.StateManager - keyManager *componentmocks.KeyManager - endorsementGatherer *privatetxnmgrmocks.EndorsementGatherer - publisher *privatetxnmgrmocks.Publisher - identityResolver *componentmocks.IdentityResolver - preparedTransactionDistributer *preparedtxdistributionmocks.PreparedTransactionDistributer - txManager *componentmocks.TXManager - pubTxManager *componentmocks.PublicTxManager - transportWriter *privatetxnmgrmocks.TransportWriter + allComponents *componentmocks.AllComponents + privateTxManager *componentmocks.PrivateTxManager + domainSmartContract *componentmocks.DomainSmartContract + domainContext *componentmocks.DomainContext + domainMgr *componentmocks.DomainManager + domain *componentmocks.Domain + transportManager *componentmocks.TransportManager + stateStore *componentmocks.StateManager + keyManager *componentmocks.KeyManager + endorsementGatherer *privatetxnmgrmocks.EndorsementGatherer + publisher *privatetxnmgrmocks.Publisher + identityResolver *componentmocks.IdentityResolver + txManager *componentmocks.TXManager + pubTxManager *componentmocks.PublicTxManager + transportWriter *privatetxnmgrmocks.TransportWriter } func newSequencerForTesting(t *testing.T, ctx context.Context, domainAddress *tktypes.EthAddress) (*Sequencer, *sequencerDepencyMocks, func()) { @@ -63,22 +61,21 @@ func newSequencerForTesting(t *testing.T, ctx context.Context, domainAddress *tk } mocks := &sequencerDepencyMocks{ - allComponents: componentmocks.NewAllComponents(t), - privateTxManager: componentmocks.NewPrivateTxManager(t), - domainSmartContract: componentmocks.NewDomainSmartContract(t), - domainContext: componentmocks.NewDomainContext(t), - domainMgr: componentmocks.NewDomainManager(t), - domain: componentmocks.NewDomain(t), - transportManager: componentmocks.NewTransportManager(t), - stateStore: componentmocks.NewStateManager(t), - keyManager: componentmocks.NewKeyManager(t), - endorsementGatherer: privatetxnmgrmocks.NewEndorsementGatherer(t), - publisher: privatetxnmgrmocks.NewPublisher(t), - identityResolver: componentmocks.NewIdentityResolver(t), - preparedTransactionDistributer: preparedtxdistributionmocks.NewPreparedTransactionDistributer(t), - txManager: componentmocks.NewTXManager(t), - pubTxManager: componentmocks.NewPublicTxManager(t), - transportWriter: privatetxnmgrmocks.NewTransportWriter(t), + allComponents: componentmocks.NewAllComponents(t), + privateTxManager: componentmocks.NewPrivateTxManager(t), + domainSmartContract: componentmocks.NewDomainSmartContract(t), + domainContext: componentmocks.NewDomainContext(t), + domainMgr: componentmocks.NewDomainManager(t), + domain: componentmocks.NewDomain(t), + transportManager: componentmocks.NewTransportManager(t), + stateStore: componentmocks.NewStateManager(t), + keyManager: componentmocks.NewKeyManager(t), + endorsementGatherer: privatetxnmgrmocks.NewEndorsementGatherer(t), + publisher: privatetxnmgrmocks.NewPublisher(t), + identityResolver: componentmocks.NewIdentityResolver(t), + txManager: componentmocks.NewTXManager(t), + pubTxManager: componentmocks.NewPublicTxManager(t), + transportWriter: privatetxnmgrmocks.NewTransportWriter(t), } mocks.allComponents.On("StateManager").Return(mocks.stateStore).Maybe() mocks.allComponents.On("DomainManager").Return(mocks.domainMgr).Maybe() diff --git a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go index 8ae4864e6..9aeaab96d 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go +++ b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go @@ -21,7 +21,6 @@ import ( "github.com/google/uuid" "github.com/kaleido-io/paladin/core/internal/components" - "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" "gorm.io/gorm" @@ -32,7 +31,7 @@ type dispatchOperation struct { publicDispatches []*PublicDispatch privateDispatches []*components.ValidatedTransaction preparedTransactions []*components.PreparedTransactionWithRefs - preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistributionPersisted + preparedTxnDistributions []*components.PreparedTransactionWithRefs } type DispatchPersisted struct { @@ -58,11 +57,14 @@ type DispatchBatch struct { // PersistDispatches persists the dispatches to the database and coordinates with the public transaction manager // to submit public transactions. -func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistribution) error { +func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*components.PreparedTransactionWithRefs) error { - preparedTxnDistributionsPersisted := make([]*preparedtxdistribution.PreparedTxnDistributionPersisted, 0, len(dispatchBatch.PreparedTransactions)) + preparedTxnDistributionsPersisted := make([]*components.ReliableMessage, 0, len(dispatchBatch.PreparedTransactions)) for _, preparedTxnDistribution := range preparedTxnDistributions { - preparedTxnDistributionsPersisted = append(preparedTxnDistributionsPersisted, &preparedtxdistribution.PreparedTxnDistributionPersisted{ + preparedTxnDistributionsPersisted = append(preparedTxnDistributionsPersisted, &components.ReliableMessage{ + MessageType: components.RMTPreparedTransaction.Enum(), + Node: "node2", + Metadata: tktypes.JSONString(sds[i]), ID: preparedTxnDistribution.ID, PreparedTxnID: preparedTxnDistribution.PreparedTxnID, IdentityLocator: preparedTxnDistribution.IdentityLocator, diff --git a/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go b/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go index fa508c8b0..c01803722 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go +++ b/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go @@ -24,7 +24,6 @@ import ( "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/flushwriter" - "github.com/kaleido-io/paladin/core/internal/preparedtxdistribution" "github.com/kaleido-io/paladin/core/pkg/persistence" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" @@ -51,7 +50,7 @@ type SyncPoints interface { // to the PrivateTxnManager's persistence store in the same database transaction // Although the actual persistence is offloaded to the flushwriter, this method is synchronous and will block until the // dispatch sequence is written to the database - PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*preparedtxdistribution.PreparedTxnDistribution) error + PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*components.PreparedTransactionWithRefs) error // Deploy is a special case of dispatch batch, where there are no private states, so no domain context is required PersistDeployDispatchBatch(ctx context.Context, dispatchBatch *DispatchBatch) error diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 553e40435..914a141af 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -50,6 +50,7 @@ type transportManager struct { registryManager components.RegistryManager stateManager components.StateManager domainManager components.DomainManager + txManager components.TXManager privateTxManager components.PrivateTxManager identityResolver components.IdentityResolver persistence persistence.Persistence @@ -111,6 +112,7 @@ func (tm *transportManager) PostInit(c components.AllComponents) error { tm.registryManager = c.RegistryManager() tm.stateManager = c.StateManager() tm.domainManager = c.DomainManager() + tm.txManager = c.TxManager() tm.privateTxManager = c.PrivateTxManager() tm.identityResolver = c.IdentityResolver() tm.persistence = c.Persistence() diff --git a/core/go/internal/transportmgr/manager_test.go b/core/go/internal/transportmgr/manager_test.go index 61bf98e20..44b15ec09 100644 --- a/core/go/internal/transportmgr/manager_test.go +++ b/core/go/internal/transportmgr/manager_test.go @@ -41,6 +41,7 @@ type mockComponents struct { registryManager *componentmocks.RegistryManager stateManager *componentmocks.StateManager domainManager *componentmocks.DomainManager + txManager *componentmocks.TXManager privateTxManager *componentmocks.PrivateTxManager identityResolver *componentmocks.IdentityResolver } @@ -50,6 +51,7 @@ func newMockComponents(t *testing.T, realDB bool) *mockComponents { mc.registryManager = componentmocks.NewRegistryManager(t) mc.stateManager = componentmocks.NewStateManager(t) mc.domainManager = componentmocks.NewDomainManager(t) + mc.txManager = componentmocks.NewTXManager(t) mc.privateTxManager = componentmocks.NewPrivateTxManager(t) mc.identityResolver = componentmocks.NewIdentityResolver(t) if realDB { @@ -67,6 +69,7 @@ func newMockComponents(t *testing.T, realDB bool) *mockComponents { mc.c.On("RegistryManager").Return(mc.registryManager).Maybe() mc.c.On("StateManager").Return(mc.stateManager).Maybe() mc.c.On("DomainManager").Return(mc.domainManager).Maybe() + mc.c.On("TxManager").Return(mc.txManager).Maybe() mc.c.On("PrivateTxManager").Return(mc.privateTxManager).Maybe() mc.c.On("IdentityResolver").Return(mc.identityResolver).Maybe() return mc diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 6480b99f8..c15bfab40 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -353,34 +353,6 @@ func (p *peer) reliableMessageScan(checkNew bool) error { return nil } -func (p *peer) buildStateDistributionMsg(rm *components.ReliableMessage) (*prototk.PaladinMsg, error, error) { - - // Validate the message first (not retryable) - sd, parsed, parseErr := parseStateDistribution(p.ctx, rm.ID, rm.Metadata) - if parseErr != nil { - return nil, parseErr, nil - } - - // Get the state - distinguishing between not found, vs. a retryable error - state, err := p.tm.stateManager.GetState(p.ctx, p.tm.persistence.DB(), sd.Domain, parsed.ContractAddress, parsed.ID, false, false) - if err != nil { - return nil, nil, err - } - if state == nil { - return nil, - i18n.NewError(p.ctx, msgs.MsgTransportStateNotAvailableLocally, sd.Domain, parsed.ContractAddress, parsed.ID), - nil - } - sd.StateData = state.Data - - return &prototk.PaladinMsg{ - MessageId: rm.ID.String(), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeStateDistribution, - Payload: tktypes.JSONString(sd), - }, nil, nil -} - func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err error) { type paladinMsgWithSeq struct { @@ -405,7 +377,7 @@ func (p *peer) processReliableMsgPage(page []*components.ReliableMessage) (err e var errorAck error switch rm.MessageType.V() { case components.RMTState: - msg, errorAck, err = p.buildStateDistributionMsg(rm) + msg, errorAck, err = p.tm.buildStateDistributionMsg(p.ctx, p.tm.persistence.DB(), rm) case components.RMTReceipt: // TODO: Implement for receipt distribution fallthrough diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index a37bcb799..093cf63c1 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -206,7 +206,7 @@ func TestReliableMessageSendSendQuiesceRealDB(t *testing.T) { for _, msgID := range msgIDs { rmr, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ FromNode: "node2", - Message: buildAck(msgID, nil), + Message: buildAck(msgID, ""), }) require.NoError(t, err) assert.NotNil(t, rmr) diff --git a/core/go/internal/transportmgr/reliable_msg_handler.go b/core/go/internal/transportmgr/reliable_msg_handler.go index 51a57aaee..937a70c6a 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler.go +++ b/core/go/internal/transportmgr/reliable_msg_handler.go @@ -18,7 +18,6 @@ package transportmgr import ( "context" "encoding/json" - "errors" "github.com/google/uuid" "github.com/hyperledger/firefly-common/pkg/i18n" @@ -32,10 +31,11 @@ import ( ) const ( - RMHMessageTypeAck = "ack" - RMHMessageTypeNack = "nack" - RMHMessageTypeStateDistribution = string(components.RMTState) - RMHMessageTypeStateReceipt = string(components.RMTReceipt) + RMHMessageTypeAck = "ack" + RMHMessageTypeNack = "nack" + RMHMessageTypeStateDistribution = string(components.RMTState) + RMHMessageTypeReceipt = string(components.RMTReceipt) + RMHMessageTypePreparedTransaction = string(components.RMTPreparedTransaction) ) type reliableMsgOp struct { @@ -66,6 +66,8 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go var acksToWrite []*components.ReliableMessageAck var acksToSend []*ackInfo statesToAdd := make(map[string][]*stateAndAck) + var preparedTxnToAdd []*components.PreparedTransactionWithRefs + var txReceiptsToFinalize []*components.ReceiptInput // The batch can contain different kinds of message that all need persistence activity for _, v := range values { @@ -83,6 +85,30 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go ack: &ackInfo{node: v.p.Name, id: v.msgID}, }) } + case RMHMessageTypePreparedTransaction: + var pt components.PreparedTransactionWithRefs + err := json.Unmarshal(v.msg.Payload, &pt) + if err != nil { + acksToSend = append(acksToSend, + &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently + ) + } else { + // Build the ack now, as we'll fail the whole TX and not send any acks if the write fails + acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msgID}) + preparedTxnToAdd = append(preparedTxnToAdd, &pt) + } + case RMHMessageTypeReceipt: + var receipt components.ReceiptInput + err := json.Unmarshal(v.msg.Payload, &receipt) + if err != nil { + acksToSend = append(acksToSend, + &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently + ) + } else { + // Build the ack now, as we'll fail the whole TX and not send any acks if the write fails + acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msgID}) + txReceiptsToFinalize = append(txReceiptsToFinalize, &receipt) + } case RMHMessageTypeAck, RMHMessageTypeNack: ackNackToWrite := tm.parseReceivedAckNack(ctx, v.msg) if ackNackToWrite != nil { @@ -150,25 +176,42 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go } } + // Insert the transaction receipts + if len(txReceiptsToFinalize) > 0 { + if err := tm.txManager.FinalizeTransactions(ctx, dbTX, txReceiptsToFinalize); err != nil { + return nil, nil, err + } + } + + // Insert the prepared transactions, capturing any post-commit + var writePreparedTxPostCommit func() + if len(preparedTxnToAdd) > 0 { + var err error + if writePreparedTxPostCommit, err = tm.txManager.WritePreparedTransactions(ctx, dbTX, preparedTxnToAdd); err != nil { + return nil, nil, err + } + } + // We use a post-commit handler to send back any acks to the other side that are required return func(err error) { if err == nil { // We've committed the database work ok - send the acks/nacks to the other side for _, a := range acksToSend { - _ = tm.queueFireAndForget(ctx, a.node, buildAck(a.id, errors.New(a.Error))) + _ = tm.queueFireAndForget(ctx, a.node, buildAck(a.id, a.Error)) + } + if writePreparedTxPostCommit != nil { + writePreparedTxPostCommit() } } }, make([]flushwriter.Result[*noResult], len(values)), nil } -func buildAck(msgID uuid.UUID, err error) *prototk.PaladinMsg { +func buildAck(msgID uuid.UUID, errString string) *prototk.PaladinMsg { cid := msgID.String() msgType := RMHMessageTypeAck - var errString string - if err != nil { + if errString != "" { msgType = RMHMessageTypeNack - errString = err.Error() } return &prototk.PaladinMsg{ MessageId: uuid.NewString(), @@ -206,6 +249,34 @@ func (tm *transportManager) parseReceivedAckNack(ctx context.Context, msg *proto return ackNackToWrite } +func (tm *transportManager) buildStateDistributionMsg(ctx context.Context, dbTX *gorm.DB, rm *components.ReliableMessage) (*prototk.PaladinMsg, error, error) { + + // Validate the message first (not retryable) + sd, parsed, parseErr := parseStateDistribution(ctx, rm.ID, rm.Metadata) + if parseErr != nil { + return nil, parseErr, nil + } + + // Get the state - distinguishing between not found, vs. a retryable error + state, err := tm.stateManager.GetState(ctx, dbTX, sd.Domain, parsed.ContractAddress, parsed.ID, false, false) + if err != nil { + return nil, nil, err + } + if state == nil { + return nil, + i18n.NewError(ctx, msgs.MsgTransportStateNotAvailableLocally, sd.Domain, parsed.ContractAddress, parsed.ID), + nil + } + sd.StateData = state.Data + + return &prototk.PaladinMsg{ + MessageId: rm.ID.String(), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeStateDistribution, + Payload: tktypes.JSONString(sd), + }, nil, nil +} + func parseStateDistribution(ctx context.Context, msgID uuid.UUID, data []byte) (sd *components.StateDistributionWithData, parsed *components.StateUpsertOutsideContext, err error) { parsed = &components.StateUpsertOutsideContext{} var contractAddr *tktypes.EthAddress diff --git a/core/go/internal/transportmgr/reliable_msg_handler_test.go b/core/go/internal/transportmgr/reliable_msg_handler_test.go index d3e272323..c27644042 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler_test.go +++ b/core/go/internal/transportmgr/reliable_msg_handler_test.go @@ -26,6 +26,7 @@ import ( "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" "github.com/stretchr/testify/assert" @@ -33,6 +34,33 @@ import ( "github.com/stretchr/testify/require" ) +func setupAckOrNackCheck(t *testing.T, tp *testPlugin, msgID uuid.UUID, expectedErr string) func() { + mockActivateDeactivateOk(tp) + sentMessages := make(chan *prototk.PaladinMsg) + tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { + sent := req.Message + sentMessages <- sent + return nil, nil + } + + return func() { + expectedAckOrNack := <-sentMessages + require.Equal(t, msgID.String(), *expectedAckOrNack.CorrelationId) + require.Equal(t, prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, expectedAckOrNack.Component) + var ai ackInfo + err := json.Unmarshal(expectedAckOrNack.Payload, &ai) + require.NoError(t, err) + if expectedErr == "" { + require.Equal(t, RMHMessageTypeAck, expectedAckOrNack.MessageType) + require.Empty(t, ai.Error) + } else { + require.Equal(t, RMHMessageTypeNack, expectedAckOrNack.MessageType) + require.Regexp(t, expectedErr, ai.Error) + } + + } +} + func TestReceiveMessageStateSendAckRealDB(t *testing.T) { ctx, _, tp, done := newTestTransport(t, true, mockGoodTransport, @@ -60,13 +88,7 @@ func TestReceiveMessageStateSendAckRealDB(t *testing.T) { }), } - mockActivateDeactivateOk(tp) - sentMessages := make(chan *prototk.PaladinMsg) - tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { - sent := req.Message - sentMessages <- sent - return nil, nil - } + ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "") // Receive the message that needs the ack rmr, err := tp.t.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ @@ -76,9 +98,7 @@ func TestReceiveMessageStateSendAckRealDB(t *testing.T) { require.NoError(t, err) assert.NotNil(t, rmr) - ack := <-sentMessages - require.JSONEq(t, string(ack.Payload), `{}`) - require.Equal(t, msgID.String(), *ack.CorrelationId) + ackNackCheck() } @@ -110,13 +130,7 @@ func TestHandleStateDistroBadState(t *testing.T) { }), } - mockActivateDeactivateOk(tp) - sentMessages := make(chan *prototk.PaladinMsg) - tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { - sent := req.Message - sentMessages <- sent - return nil, nil - } + ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "bad data") p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) @@ -130,14 +144,7 @@ func TestHandleStateDistroBadState(t *testing.T) { // Run the postCommit and check we get the nack postCommit(nil) - expectedNack := <-sentMessages - require.Equal(t, msgID.String(), *expectedNack.CorrelationId) - require.Equal(t, prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, expectedNack.Component) - require.Equal(t, RMHMessageTypeNack, expectedNack.MessageType) - var ai ackInfo - err = json.Unmarshal(expectedNack.Payload, &ai) - require.NoError(t, err) - require.Regexp(t, "bad data", ai.Error) + ackNackCheck() } func TestHandleStateDistroBadMsg(t *testing.T) { @@ -164,13 +171,7 @@ func TestHandleStateDistroBadMsg(t *testing.T) { }), } - mockActivateDeactivateOk(tp) - sentMessages := make(chan *prototk.PaladinMsg) - tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { - sent := req.Message - sentMessages <- sent - return nil, nil - } + ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "PD012016") p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) @@ -184,14 +185,7 @@ func TestHandleStateDistroBadMsg(t *testing.T) { // Run the postCommit and check we get the nack postCommit(nil) - expectedNack := <-sentMessages - require.Equal(t, msgID.String(), *expectedNack.CorrelationId) - require.Equal(t, prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, expectedNack.Component) - require.Equal(t, RMHMessageTypeNack, expectedNack.MessageType) - var ai ackInfo - err = json.Unmarshal(expectedNack.Payload, &ai) - require.NoError(t, err) - require.Regexp(t, "PD012016", ai.Error) + ackNackCheck() } func TestHandleStateDistroUnknownMsgType(t *testing.T) { @@ -210,13 +204,7 @@ func TestHandleStateDistroUnknownMsgType(t *testing.T) { Payload: []byte(`{}`), } - mockActivateDeactivateOk(tp) - sentMessages := make(chan *prototk.PaladinMsg) - tp.Functions.SendMessage = func(ctx context.Context, req *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { - sent := req.Message - sentMessages <- sent - return nil, nil - } + ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "PD012017") p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) @@ -230,14 +218,7 @@ func TestHandleStateDistroUnknownMsgType(t *testing.T) { // Run the postCommit and check we get the nack postCommit(nil) - expectedNack := <-sentMessages - require.Equal(t, msgID.String(), *expectedNack.CorrelationId) - require.Equal(t, prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, expectedNack.Component) - require.Equal(t, RMHMessageTypeNack, expectedNack.MessageType) - var ai ackInfo - err = json.Unmarshal(expectedNack.Payload, &ai) - require.NoError(t, err) - require.Regexp(t, "PD012017", ai.Error) + ackNackCheck() } func TestHandleAckFailReadMsg(t *testing.T) { @@ -317,3 +298,168 @@ func TestHandleBadAckNoCorrelId(t *testing.T) { require.NoError(t, err) postCommit(nil) } + +func TestHandleReceiptFail(t *testing.T) { + ctx, tm, _, done := newTestTransport(t, false, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.txManager.On("FinalizeTransactions", mock.Anything, mock.Anything, mock.Anything). + Return(fmt.Errorf("pop")) + }, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeReceipt, + Payload: tktypes.JSONString(&components.ReceiptInput{ + Domain: "domain1", + ReceiptType: components.RT_Success, + }), + } + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.Regexp(t, "pop", err) + +} + +func TestHandlePreparedTxFail(t *testing.T) { + ctx, tm, _, done := newTestTransport(t, false, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.txManager.On("WritePreparedTransactions", mock.Anything, mock.Anything, mock.Anything). + Return(nil, fmt.Errorf("pop")) + }, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypePreparedTransaction, + Payload: tktypes.JSONString(&components.PreparedTransactionWithRefs{ + PreparedTransactionBase: &pldapi.PreparedTransactionBase{ + ID: uuid.New(), + }, + }), + } + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.Regexp(t, "pop", err) + +} + +func TestHandleReceiptBadData(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypeReceipt, + Payload: []byte(`!{ bad data`), + } + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "invalid character") + + // Handle the batch - will fail to write the states + pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.NoError(t, err) + + pc(nil) + + ackNackCheck() +} + +func TestHandlePreparedTxBadData(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypePreparedTransaction, + Payload: []byte(`!{ bad data`), + } + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "invalid character") + + pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.NoError(t, err) + + pc(nil) + + ackNackCheck() +} + +func TestHandlePreparedOk(t *testing.T) { + persistentTxPCCalled := make(chan struct{}) + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.txManager.On("WritePreparedTransactions", mock.Anything, mock.Anything, mock.Anything). + Return(func() { + close(persistentTxPCCalled) + }, nil) + }, + ) + defer done() + + msgID := uuid.New() + msg := &prototk.PaladinMsg{ + MessageId: msgID.String(), + CorrelationId: confutil.P(uuid.NewString()), + Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, + MessageType: RMHMessageTypePreparedTransaction, + Payload: tktypes.JSONString(&components.PreparedTransactionWithRefs{ + PreparedTransactionBase: &pldapi.PreparedTransactionBase{ + ID: uuid.New(), + }, + }), + } + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "") + pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {msgID: msgID, p: p, msg: msg}, + }) + require.NoError(t, err) + + pc(nil) + + <-persistentTxPCCalled + + ackNackCheck() +} diff --git a/core/go/internal/txmgr/prepared_transaction.go b/core/go/internal/txmgr/prepared_transaction.go index 9bc377960..232c57497 100644 --- a/core/go/internal/txmgr/prepared_transaction.go +++ b/core/go/internal/txmgr/prepared_transaction.go @@ -160,27 +160,47 @@ func (tm *txManager) WritePreparedTransactions(ctx context.Context, dbTX *gorm.D } func (tm *txManager) QueryPreparedTransactions(ctx context.Context, dbTX *gorm.DB, jq *query.QueryJSON) ([]*pldapi.PreparedTransaction, error) { - qw := &queryWrapper[preparedTransaction, pldapi.PreparedTransaction]{ + bpts, err := tm.queryPreparedTransactionsBase(ctx, dbTX, jq) + if err != nil { + return nil, err + } + return tm.enrichPreparedTransactionsFull(ctx, dbTX, bpts) +} + +func (tm *txManager) QueryPreparedTransactionsWithRefs(ctx context.Context, dbTX *gorm.DB, jq *query.QueryJSON) ([]*components.PreparedTransactionWithRefs, error) { + bpts, err := tm.queryPreparedTransactionsBase(ctx, dbTX, jq) + if err != nil { + return nil, err + } + return tm.enrichPreparedTransactionsRefs(ctx, dbTX, bpts) +} + +func (tm *txManager) queryPreparedTransactionsBase(ctx context.Context, dbTX *gorm.DB, jq *query.QueryJSON) ([]*pldapi.PreparedTransactionBase, error) { + qw := &queryWrapper[preparedTransaction, pldapi.PreparedTransactionBase]{ p: tm.p, table: "prepared_txns", defaultSort: "-created", filters: preparedTransactionFilters, query: jq, - mapResult: func(pt *preparedTransaction) (*pldapi.PreparedTransaction, error) { - preparedTx := &pldapi.PreparedTransaction{ - PreparedTransactionBase: pldapi.PreparedTransactionBase{ - ID: pt.ID, - Domain: pt.Domain, - To: pt.To, - Metadata: pt.Metadata, - }, + mapResult: func(pt *preparedTransaction) (*pldapi.PreparedTransactionBase, error) { + preparedTx := &pldapi.PreparedTransactionBase{ + ID: pt.ID, + Domain: pt.Domain, + To: pt.To, + Metadata: pt.Metadata, } return preparedTx, json.Unmarshal(pt.Transaction, &preparedTx.Transaction) }, } - preparedTransactions, err := qw.run(ctx, dbTX) - if err != nil { - return nil, err + return qw.run(ctx, dbTX) +} + +func (tm *txManager) enrichPreparedTransactionsFull(ctx context.Context, dbTX *gorm.DB, basePTs []*pldapi.PreparedTransactionBase) ([]*pldapi.PreparedTransaction, error) { + preparedTransactions := make([]*pldapi.PreparedTransaction, len(basePTs)) + for i, bpt := range basePTs { + preparedTransactions[i] = &pldapi.PreparedTransaction{ + PreparedTransactionBase: bpt, + } } if len(preparedTransactions) > 0 { transactionIDs := make([]uuid.UUID, len(preparedTransactions)) @@ -220,6 +240,50 @@ func (tm *txManager) QueryPreparedTransactions(ctx context.Context, dbTX *gorm.D return preparedTransactions, nil } +func (tm *txManager) enrichPreparedTransactionsRefs(ctx context.Context, dbTX *gorm.DB, basePTs []*pldapi.PreparedTransactionBase) ([]*components.PreparedTransactionWithRefs, error) { + preparedTransactions := make([]*components.PreparedTransactionWithRefs, len(basePTs)) + for i, bpt := range basePTs { + preparedTransactions[i] = &components.PreparedTransactionWithRefs{ + PreparedTransactionBase: bpt, + } + } + if len(preparedTransactions) > 0 { + transactionIDs := make([]uuid.UUID, len(preparedTransactions)) + for i, pt := range preparedTransactions { + transactionIDs[i] = pt.ID + } + var preparedStates []*preparedTransactionState + err := dbTX.WithContext(ctx). + Where(`"transaction" IN (?)`, transactionIDs). + Order(`"transaction"`). + Order(`"type"`). + Order(`"state_idx"`). + Find(&preparedStates). + Error + if err != nil { + return nil, err + } + for _, ps := range preparedStates { + for _, pt := range preparedTransactions { + if ps.Transaction == pt.ID { + switch ps.Type { + case preparedSpend: + pt.StateRefs.Spent = append(pt.StateRefs.Spent, ps.StateID) + case preparedRead: + pt.StateRefs.Read = append(pt.StateRefs.Read, ps.StateID) + case preparedConfirm: + pt.StateRefs.Confirmed = append(pt.StateRefs.Confirmed, ps.StateID) + case preparedInfo: + pt.StateRefs.Info = append(pt.StateRefs.Info, ps.StateID) + } + } + } + } + + } + return preparedTransactions, nil +} + func (tm *txManager) GetPreparedTransactionByID(ctx context.Context, dbTX *gorm.DB, id uuid.UUID) (*pldapi.PreparedTransaction, error) { pts, err := tm.QueryPreparedTransactions(ctx, dbTX, query.NewQueryBuilder().Limit(1).Equal("id", id).Query()) if len(pts) == 0 || err != nil { @@ -227,3 +291,11 @@ func (tm *txManager) GetPreparedTransactionByID(ctx context.Context, dbTX *gorm. } return pts[0], nil } + +func (tm *txManager) GetPreparedTransactionWithRefsByID(ctx context.Context, dbTX *gorm.DB, id uuid.UUID) (*components.PreparedTransactionWithRefs, error) { + pts, err := tm.QueryPreparedTransactionsWithRefs(ctx, dbTX, query.NewQueryBuilder().Limit(1).Equal("id", id).Query()) + if len(pts) == 0 || err != nil { + return nil, err + } + return pts[0], nil +} diff --git a/core/go/internal/txmgr/prepared_transaction_test.go b/core/go/internal/txmgr/prepared_transaction_test.go index d93343177..534b5a8a6 100644 --- a/core/go/internal/txmgr/prepared_transaction_test.go +++ b/core/go/internal/txmgr/prepared_transaction_test.go @@ -132,7 +132,7 @@ func TestPreparedTransactionRealDB(t *testing.T) { childFnABI := abi.ABI{{Type: abi.Function, Name: "doThing2"}} ptInsert := &components.PreparedTransactionWithRefs{ - PreparedTransactionBase: pldapi.PreparedTransactionBase{ + PreparedTransactionBase: &pldapi.PreparedTransactionBase{ ID: *parentTx.Transaction.ID, Domain: parentTx.Transaction.Domain, To: &contractAddressDomain1, @@ -166,28 +166,30 @@ func TestPreparedTransactionRealDB(t *testing.T) { require.NoError(t, err) postCommit() + expectedPBT := &pldapi.PreparedTransactionBase{ + ID: *parentTx.Transaction.ID, + Domain: "domain1", + To: &contractAddressDomain1, + Transaction: pldapi.TransactionInput{ + TransactionBase: pldapi.TransactionBase{ + From: "me@node1", + IdempotencyKey: "child_txn", + Type: pldapi.TransactionTypePrivate.Enum(), + Domain: "domain2", + To: &contractAddressDomain2, + Function: "doThing2()", // now fully qualified + ABIReference: &storedABI.Hash, // now resolved + Data: tktypes.RawJSON(`{}`), // normalized + }, + }, + Metadata: tktypes.RawJSON(`{"some":"data"}`), + } + // Query it back pt, err := txm.GetPreparedTransactionByID(ctx, txm.p.DB(), *parentTx.Transaction.ID) require.NoError(t, err) require.Equal(t, &pldapi.PreparedTransaction{ - PreparedTransactionBase: pldapi.PreparedTransactionBase{ - ID: *parentTx.Transaction.ID, - Domain: "domain1", - To: &contractAddressDomain1, - Transaction: pldapi.TransactionInput{ - TransactionBase: pldapi.TransactionBase{ - From: "me@node1", - IdempotencyKey: "child_txn", - Type: pldapi.TransactionTypePrivate.Enum(), - Domain: "domain2", - To: &contractAddressDomain2, - Function: "doThing2()", // now fully qualified - ABIReference: &storedABI.Hash, // now resolved - Data: tktypes.RawJSON(`{}`), // normalized - }, - }, - Metadata: tktypes.RawJSON(`{"some":"data"}`), - }, + PreparedTransactionBase: expectedPBT, States: pldapi.TransactionStates{ Spent: spent, Read: read, @@ -196,6 +198,27 @@ func TestPreparedTransactionRealDB(t *testing.T) { }, }, pt) + // Query it back + ptr, err := txm.GetPreparedTransactionWithRefsByID(ctx, txm.p.DB(), *parentTx.Transaction.ID) + require.NoError(t, err) + require.Equal(t, &components.PreparedTransactionWithRefs{ + PreparedTransactionBase: expectedPBT, + StateRefs: components.TransactionStateRefs{ + Spent: stateIDs(spent), + Read: stateIDs(read), + Confirmed: stateIDs(confirm), + Info: stateIDs(info), + }, + }, ptr) + +} + +func stateIDs(states []*pldapi.StateBase) []tktypes.HexBytes { + stateIDs := make([]tktypes.HexBytes, len(states)) + for i, s := range states { + stateIDs[i] = s.ID + } + return stateIDs } func TestWritePreparedTransactionsBadTX(t *testing.T) { @@ -203,7 +226,9 @@ func TestWritePreparedTransactionsBadTX(t *testing.T) { ctx, txm, done := newTestTransactionManager(t, false) defer done() - _, err := txm.WritePreparedTransactions(ctx, txm.p.DB(), []*components.PreparedTransactionWithRefs{{}}) + _, err := txm.WritePreparedTransactions(ctx, txm.p.DB(), []*components.PreparedTransactionWithRefs{{ + PreparedTransactionBase: &pldapi.PreparedTransactionBase{}, + }}) assert.Regexp(t, "PD012211", err) } @@ -218,6 +243,16 @@ func TestQueryPreparedTransactionFailNoLimit(t *testing.T) { } +func TestQueryPreparedTransactionWithRefsFailNoLimit(t *testing.T) { + + ctx, txm, done := newTestTransactionManager(t, false) + defer done() + + _, err := txm.QueryPreparedTransactionsWithRefs(ctx, txm.p.DB(), query.NewQueryBuilder().Query()) + assert.Regexp(t, "PD012200", err) + +} + func TestQueryPreparedTransactionFailStates(t *testing.T) { txID := uuid.New() @@ -233,3 +268,33 @@ func TestQueryPreparedTransactionFailStates(t *testing.T) { assert.Regexp(t, "pop", err) } + +func TestQueryPreparedTransactionWithRefsFailStates(t *testing.T) { + + txID := uuid.New() + ctx, txm, done := newTestTransactionManager(t, false, func(conf *pldconf.TxManagerConfig, mc *mockComponents) { + mc.db.ExpectQuery("SELECT.*prepared_txns").WillReturnRows( + sqlmock.NewRows([]string{"id", "transaction"}). + AddRow(txID, `{}`)) + mc.db.ExpectQuery("SELECT.*prepared_txn_states").WillReturnError(fmt.Errorf("pop")) + }) + defer done() + + _, err := txm.QueryPreparedTransactionsWithRefs(ctx, txm.p.DB(), query.NewQueryBuilder().Limit(1).Query()) + assert.Regexp(t, "pop", err) + +} + +func TestGetPreparedTransactionWithRefsByIDNotFound(t *testing.T) { + + ctx, txm, done := newTestTransactionManager(t, false, func(conf *pldconf.TxManagerConfig, mc *mockComponents) { + mc.db.ExpectQuery("SELECT.*prepared_txns").WillReturnRows( + sqlmock.NewRows([]string{"id", "transaction"})) + }) + defer done() + + pt, err := txm.GetPreparedTransactionWithRefsByID(ctx, txm.p.DB(), uuid.New()) + require.NoError(t, err) + assert.Nil(t, pt) + +} diff --git a/toolkit/go/pkg/pldapi/transaction.go b/toolkit/go/pkg/pldapi/transaction.go index 5013ed90f..da259aeff 100644 --- a/toolkit/go/pkg/pldapi/transaction.go +++ b/toolkit/go/pkg/pldapi/transaction.go @@ -172,6 +172,6 @@ type PreparedTransactionBase struct { } type PreparedTransaction struct { - PreparedTransactionBase + *PreparedTransactionBase States TransactionStates `docstruct:"PreparedTransaction" json:"states"` } From 55db3c091bf52389b8d47dbfe2d84e27f69db14b Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sat, 4 Jan 2025 12:16:19 -0500 Subject: [PATCH 25/41] Add a ReceivedMessage object for component delivery Signed-off-by: Peter Broadhurst --- core/go/internal/components/transportmgr.go | 10 +- .../identityresolver/identityresolver.go | 16 +- .../identityresolver/transport_client.go | 5 +- .../privatetxnmgr/private_txn_mgr_test.go | 2 +- .../transportmgr/reliable_msg_handler.go | 31 ++-- .../transportmgr/reliable_msg_handler_test.go | 169 +++++++----------- core/go/internal/transportmgr/transport.go | 25 ++- .../internal/transportmgr/transport_test.go | 8 +- 8 files changed, 113 insertions(+), 153 deletions(-) diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index ef0383481..d0e93d24d 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -35,6 +35,14 @@ type FireAndForgetMessageSend struct { Payload []byte } +type ReceivedMessage struct { + FromNode string + MessageID uuid.UUID + CorrelationID *uuid.UUID + MessageType string + Payload []byte +} + type ReliableMessageType string const ( @@ -112,7 +120,7 @@ type TransportClient interface { // It delivers messages to this function: // - in whatever order they are received from the transport plugin(s), which is dependent on the _sender_ usually // - with whatever concurrency is performed by the transport plugin(s), which is commonly one per remote node, but that's not assured - HandlePaladinMsg(context.Context, *prototk.PaladinMsg) + HandlePaladinMsg(ctx context.Context, msg *ReceivedMessage) } type TransportManager interface { diff --git a/core/go/internal/identityresolver/identityresolver.go b/core/go/internal/identityresolver/identityresolver.go index 14b440ed7..bd48ac3bd 100644 --- a/core/go/internal/identityresolver/identityresolver.go +++ b/core/go/internal/identityresolver/identityresolver.go @@ -30,6 +30,7 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/cache" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" + "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" "google.golang.org/protobuf/proto" ) @@ -172,12 +173,10 @@ func (ir *identityResolver) ResolveVerifierAsync(ctx context.Context, lookup str return } - err = ir.transportManager.Send(ctx, &components.TransportMessage{ + err = ir.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "ResolveVerifierRequest", - MessageID: requestID, - Component: IDENTITY_RESOLVER_DESTINATION, + Component: prototk.PaladinMsg_IDENTITY_RESOLVER, Node: remoteNodeId, - ReplyTo: ir.nodeName, Payload: resolveVerifierRequestBytes, }) if err != nil { @@ -280,10 +279,10 @@ func (ir *identityResolver) handleResolveVerifierRequest(ctx context.Context, me } resolveVerifierResponseBytes, err := proto.Marshal(resolveVerifierResponse) if err == nil { - err = ir.transportManager.Send(ctx, &components.TransportMessage{ + err = ir.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "ResolveVerifierResponse", CorrelationID: requestID, - Component: IDENTITY_RESOLVER_DESTINATION, + Component: prototk.PaladinMsg_IDENTITY_RESOLVER, Node: replyTo, Payload: resolveVerifierResponseBytes, }) @@ -308,11 +307,10 @@ func (ir *identityResolver) handleResolveVerifierRequest(ctx context.Context, me } resolveVerifierErrorBytes, err := proto.Marshal(resolveVerifierError) if err == nil { - err = ir.transportManager.Send(ctx, &components.TransportMessage{ + err = ir.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "ResolveVerifierError", CorrelationID: requestID, - ReplyTo: ir.nodeName, - Component: IDENTITY_RESOLVER_DESTINATION, + Component: prototk.PaladinMsg_IDENTITY_RESOLVER, Node: replyTo, Payload: resolveVerifierErrorBytes, }) diff --git a/core/go/internal/identityresolver/transport_client.go b/core/go/internal/identityresolver/transport_client.go index ffc1180d2..fe359f9c4 100644 --- a/core/go/internal/identityresolver/transport_client.go +++ b/core/go/internal/identityresolver/transport_client.go @@ -23,18 +23,17 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/log" ) -func (ir *identityResolver) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { +func (ir *identityResolver) HandlePaladinMsg(ctx context.Context, message *components.ReceivedMessage) { //TODO this need to become an ultra low latency, non blocking, handover to the event loop thread. // need some thought on how to handle errors, retries, buffering, swapping idle sequencers in and out of memory etc... //Send the event to the sequencer for the contract and any transaction manager for the signing key messagePayload := message.Payload - replyToDestination := message.ReplyTo switch message.MessageType { case "ResolveVerifierRequest": - go ir.handleResolveVerifierRequest(ctx, messagePayload, replyToDestination, &message.MessageID) + go ir.handleResolveVerifierRequest(ctx, messagePayload, message.FromNode, &message.MessageID) case "ResolveVerifierResponse": go ir.handleResolveVerifierReply(ctx, messagePayload, message.CorrelationID.String()) case "ResolveVerifierError": diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go index 4c25628f0..29ea8f2f6 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go @@ -854,7 +854,7 @@ func TestPrivateTxManagerRemoteNotaryEndorser(t *testing.T) { localNodeMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { - assert.Equal(t, remoteNodeName, args.Get(1).(*components.TransportMessage).Node) + assert.Equal(t, remoteNodeName, args.Get(1).(*components.ReceivedMessage).Node) transportMessage := args.Get(1).(*components.TransportMessage) remoteEngine.HandlePaladinMsg(ctx, transportMessage) }() diff --git a/core/go/internal/transportmgr/reliable_msg_handler.go b/core/go/internal/transportmgr/reliable_msg_handler.go index 937a70c6a..b1f0f9975 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler.go +++ b/core/go/internal/transportmgr/reliable_msg_handler.go @@ -39,9 +39,8 @@ const ( ) type reliableMsgOp struct { - msgID uuid.UUID - p *peer - msg *prototk.PaladinMsg + p *peer + msg *components.ReceivedMessage } func (op *reliableMsgOp) WriteKey() string { @@ -74,15 +73,15 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go switch v.msg.MessageType { case RMHMessageTypeStateDistribution: - sd, stateToAdd, err := parseStateDistribution(ctx, v.msgID, v.msg.Payload) + sd, stateToAdd, err := parseStateDistribution(ctx, v.msg.MessageID, v.msg.Payload) if err != nil { acksToSend = append(acksToSend, - &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently + &ackInfo{node: v.p.Name, id: v.msg.MessageID, Error: err.Error()}, // reject the message permanently ) } else { statesToAdd[sd.Domain] = append(statesToAdd[sd.Domain], &stateAndAck{ state: stateToAdd, - ack: &ackInfo{node: v.p.Name, id: v.msgID}, + ack: &ackInfo{node: v.p.Name, id: v.msg.MessageID}, }) } case RMHMessageTypePreparedTransaction: @@ -90,11 +89,11 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go err := json.Unmarshal(v.msg.Payload, &pt) if err != nil { acksToSend = append(acksToSend, - &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently + &ackInfo{node: v.p.Name, id: v.msg.MessageID, Error: err.Error()}, // reject the message permanently ) } else { // Build the ack now, as we'll fail the whole TX and not send any acks if the write fails - acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msgID}) + acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msg.MessageID}) preparedTxnToAdd = append(preparedTxnToAdd, &pt) } case RMHMessageTypeReceipt: @@ -102,11 +101,11 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go err := json.Unmarshal(v.msg.Payload, &receipt) if err != nil { acksToSend = append(acksToSend, - &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently + &ackInfo{node: v.p.Name, id: v.msg.MessageID, Error: err.Error()}, // reject the message permanently ) } else { // Build the ack now, as we'll fail the whole TX and not send any acks if the write fails - acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msgID}) + acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msg.MessageID}) txReceiptsToFinalize = append(txReceiptsToFinalize, &receipt) } case RMHMessageTypeAck, RMHMessageTypeNack: @@ -117,7 +116,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go default: err := i18n.NewError(ctx, msgs.MsgTransportUnsupportedReliableMsgType, v.msg.MessageType) acksToSend = append(acksToSend, - &ackInfo{node: v.p.Name, id: v.msgID, Error: err.Error()}, // reject the message permanently + &ackInfo{node: v.p.Name, id: v.msg.MessageID, Error: err.Error()}, // reject the message permanently ) } } @@ -222,22 +221,18 @@ func buildAck(msgID uuid.UUID, errString string) *prototk.PaladinMsg { } } -func (tm *transportManager) parseReceivedAckNack(ctx context.Context, msg *prototk.PaladinMsg) *components.ReliableMessageAck { +func (tm *transportManager) parseReceivedAckNack(ctx context.Context, msg *components.ReceivedMessage) *components.ReliableMessageAck { var info ackInfo - var cid uuid.UUID err := json.Unmarshal(msg.Payload, &info) - if msg.CorrelationId == nil { + if msg.CorrelationID == nil { err = i18n.NewError(ctx, msgs.MsgTransportAckMissingCorrelationID) } - if err == nil { - cid, err = uuid.Parse(*msg.CorrelationId) - } if err != nil { log.L(ctx).Errorf("Received invalid ack/nack: %s", msg.Payload) return nil } ackNackToWrite := &components.ReliableMessageAck{ - MessageID: cid, + MessageID: *msg.CorrelationID, Time: tktypes.TimestampNow(), } if msg.MessageType == RMHMessageTypeNack { diff --git a/core/go/internal/transportmgr/reliable_msg_handler_test.go b/core/go/internal/transportmgr/reliable_msg_handler_test.go index c27644042..5b5dd0c9c 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler_test.go +++ b/core/go/internal/transportmgr/reliable_msg_handler_test.go @@ -102,6 +102,15 @@ func TestReceiveMessageStateSendAckRealDB(t *testing.T) { } +func testReceivedReliableMsg(msgType string, payloadObj any) *components.ReceivedMessage { + return &components.ReceivedMessage{ + MessageID: uuid.New(), + CorrelationID: confutil.P(uuid.New()), + MessageType: msgType, + Payload: tktypes.JSONString(payloadObj), + } +} + func TestHandleStateDistroBadState(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport, @@ -113,13 +122,9 @@ func TestHandleStateDistroBadState(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeStateDistribution, - Payload: tktypes.JSONString(&components.StateDistributionWithData{ + msg := testReceivedReliableMsg( + RMHMessageTypeStateDistribution, + &components.StateDistributionWithData{ StateDistribution: components.StateDistribution{ Domain: "domain1", ContractAddress: tktypes.RandAddress().String(), @@ -127,17 +132,16 @@ func TestHandleStateDistroBadState(t *testing.T) { StateID: tktypes.RandHex(32), }, StateData: []byte(`{"some":"data"}`), - }), - } + }) - ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "bad data") + ackNackCheck := setupAckOrNackCheck(t, tp, msg.MessageID, "bad data") p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) // Handle the batch - will fail to write the states postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.NoError(t, err) @@ -154,13 +158,9 @@ func TestHandleStateDistroBadMsg(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeStateDistribution, - Payload: tktypes.JSONString(&components.StateDistributionWithData{ + msg := testReceivedReliableMsg( + RMHMessageTypeStateDistribution, + &components.StateDistributionWithData{ StateDistribution: components.StateDistribution{ Domain: "domain1", ContractAddress: tktypes.RandAddress().String(), @@ -168,17 +168,16 @@ func TestHandleStateDistroBadMsg(t *testing.T) { StateID: tktypes.RandHex(32), }, StateData: []byte(`{"some":"data"}`), - }), - } + }) - ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "PD012016") + ackNackCheck := setupAckOrNackCheck(t, tp, msg.MessageID, "PD012016") p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) // Handle the batch - will fail to write the states postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.NoError(t, err) @@ -195,23 +194,16 @@ func TestHandleStateDistroUnknownMsgType(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: "unknown", - Payload: []byte(`{}`), - } + msg := testReceivedReliableMsg("unknown", struct{}{}) - ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "PD012017") + ackNackCheck := setupAckOrNackCheck(t, tp, msg.MessageID, "PD012017") p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) // Handle the batch - will fail to write the states postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.NoError(t, err) @@ -227,21 +219,14 @@ func TestHandleAckFailReadMsg(t *testing.T) { }) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeAck, - Payload: []byte(`{}`), - } + msg := testReceivedReliableMsg(RMHMessageTypeAck, struct{}{}) p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) // Handle the batch - will fail to write the states _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.Regexp(t, "pop", err) @@ -256,44 +241,34 @@ func TestHandleNackFailWriteAck(t *testing.T) { }) defer done() - msg := &prototk.PaladinMsg{ - MessageId: uuid.NewString(), - CorrelationId: confutil.P(msgID.String()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeNack, - Payload: []byte(`{}`), - } + msg := testReceivedReliableMsg(RMHMessageTypeNack, struct{}{}) + msg.CorrelationID = &msgID p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) // Handle the batch - will fail to write the states _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.Regexp(t, "pop", err) } func TestHandleBadAckNoCorrelId(t *testing.T) { - msgID := uuid.New() ctx, tm, _, done := newTestTransport(t, false) defer done() - msg := &prototk.PaladinMsg{ - MessageId: uuid.NewString(), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeAck, - Payload: []byte(`{}`), - } + msg := testReceivedReliableMsg(RMHMessageTypeAck, struct{}{}) + msg.CorrelationID = nil p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) // Handle the batch - will fail to write the states postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.NoError(t, err) postCommit(nil) @@ -308,23 +283,19 @@ func TestHandleReceiptFail(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeReceipt, - Payload: tktypes.JSONString(&components.ReceiptInput{ + msg := testReceivedReliableMsg( + RMHMessageTypeReceipt, + &components.ReceiptInput{ Domain: "domain1", ReceiptType: components.RT_Success, - }), - } + }) + p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) // Handle the batch - will fail to write the states _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.Regexp(t, "pop", err) @@ -339,23 +310,19 @@ func TestHandlePreparedTxFail(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypePreparedTransaction, - Payload: tktypes.JSONString(&components.PreparedTransactionWithRefs{ + msg := testReceivedReliableMsg( + RMHMessageTypePreparedTransaction, + &components.PreparedTransactionWithRefs{ PreparedTransactionBase: &pldapi.PreparedTransactionBase{ ID: uuid.New(), }, - }), - } + }) + p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) _, _, err = tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.Regexp(t, "pop", err) @@ -368,22 +335,17 @@ func TestHandleReceiptBadData(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypeReceipt, - Payload: []byte(`!{ bad data`), - } + msg := testReceivedReliableMsg(RMHMessageTypeReceipt, nil) + msg.Payload = []byte(`!{ bad data`) + p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) - ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "invalid character") + ackNackCheck := setupAckOrNackCheck(t, tp, msg.MessageID, "invalid character") // Handle the batch - will fail to write the states pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.NoError(t, err) @@ -399,21 +361,16 @@ func TestHandlePreparedTxBadData(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypePreparedTransaction, - Payload: []byte(`!{ bad data`), - } + msg := testReceivedReliableMsg(RMHMessageTypePreparedTransaction, nil) + msg.Payload = []byte(`!{ bad data`) + p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) - ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "invalid character") + ackNackCheck := setupAckOrNackCheck(t, tp, msg.MessageID, "invalid character") pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.NoError(t, err) @@ -436,24 +393,20 @@ func TestHandlePreparedOk(t *testing.T) { ) defer done() - msgID := uuid.New() - msg := &prototk.PaladinMsg{ - MessageId: msgID.String(), - CorrelationId: confutil.P(uuid.NewString()), - Component: prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER, - MessageType: RMHMessageTypePreparedTransaction, - Payload: tktypes.JSONString(&components.PreparedTransactionWithRefs{ + msg := testReceivedReliableMsg( + RMHMessageTypePreparedTransaction, + &components.PreparedTransactionWithRefs{ PreparedTransactionBase: &pldapi.PreparedTransactionBase{ ID: uuid.New(), }, - }), - } + }) + p, err := tm.getPeer(ctx, "node2", false) require.NoError(t, err) - ackNackCheck := setupAckOrNackCheck(t, tp, msgID, "") + ackNackCheck := setupAckOrNackCheck(t, tp, msg.MessageID, "") pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ - {msgID: msgID, p: p, msg: msg}, + {p: p, msg: msg}, }) require.NoError(t, err) diff --git a/core/go/internal/transportmgr/transport.go b/core/go/internal/transportmgr/transport.go index 2e031a389..23fdd1d9b 100644 --- a/core/go/internal/transportmgr/transport.go +++ b/core/go/internal/transportmgr/transport.go @@ -143,12 +143,14 @@ func (t *transport) ReceiveMessage(ctx context.Context, req *prototk.ReceiveMess return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } + var correlationID *uuid.UUID if msg.CorrelationId != nil { - _, err := uuid.Parse(*msg.CorrelationId) + parsedUUID, err := uuid.Parse(*msg.CorrelationId) if err != nil { log.L(ctx).Errorf("Invalid correlationId from transport: %s", protoToJSON(msg)) return nil, i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } + correlationID = &parsedUUID } p, err := t.tm.getPeer(ctx, req.FromNode, false /* we do not require a connection for sending here */) @@ -162,29 +164,34 @@ func (t *transport) ReceiveMessage(ctx context.Context, req *prototk.ReceiveMess log.L(ctx).Tracef("transport %s message received: %s", t.name, protoToJSON(msg)) } - if err := t.deliverMessage(ctx, p, msgID, msg); err != nil { + if err := t.deliverMessage(ctx, p, msg.Component, &components.ReceivedMessage{ + FromNode: req.FromNode, + MessageID: msgID, + CorrelationID: correlationID, + MessageType: msg.MessageType, + Payload: msg.Payload, + }); err != nil { return nil, err } return &prototk.ReceiveMessageResponse{}, nil } -func (t *transport) deliverMessage(ctx context.Context, p *peer, msgID uuid.UUID, msg *prototk.PaladinMsg) error { +func (t *transport) deliverMessage(ctx context.Context, p *peer, component prototk.PaladinMsg_Component, msg *components.ReceivedMessage) error { - switch msg.Component { + switch component { case prototk.PaladinMsg_RELIABLE_MESSAGE_HANDLER: _ = t.tm.reliableMsgWriter.Queue(ctx, &reliableMsgOp{ - p: p, - msgID: msgID, - msg: msg, + p: p, + msg: msg, }) case prototk.PaladinMsg_TRANSACTION_ENGINE: t.tm.privateTxManager.HandlePaladinMsg(ctx, msg) case prototk.PaladinMsg_IDENTITY_RESOLVER: t.tm.identityResolver.HandlePaladinMsg(ctx, msg) default: - log.L(ctx).Errorf("Component not found for message '%s': %s", msgID, msg.Component) - return i18n.NewError(ctx, msgs.MsgTransportComponentNotFound, msg.Component.String()) + log.L(ctx).Errorf("Component not found for message '%s': %s", msg.MessageID, component) + return i18n.NewError(ctx, msgs.MsgTransportComponentNotFound, component.String()) } return nil diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index d3dc86938..9afd062e3 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -316,11 +316,11 @@ func TestSendInvalidMessageNoPayload(t *testing.T) { } func TestReceiveMessageTransactionEngine(t *testing.T) { - receivedMessages := make(chan *prototk.PaladinMsg, 1) + receivedMessages := make(chan *components.ReceivedMessage, 1) ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.privateTxManager.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { - receivedMessages <- args[1].(*prototk.PaladinMsg) + receivedMessages <- args[1].(*components.ReceivedMessage) }) }) defer done() @@ -344,11 +344,11 @@ func TestReceiveMessageTransactionEngine(t *testing.T) { } func TestReceiveMessageIdentityResolver(t *testing.T) { - receivedMessages := make(chan *prototk.PaladinMsg, 1) + receivedMessages := make(chan *components.ReceivedMessage, 1) ctx, _, tp, done := newTestTransport(t, false, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.identityResolver.On("HandlePaladinMsg", mock.Anything, mock.Anything).Return().Run(func(args mock.Arguments) { - receivedMessages <- args[1].(*prototk.PaladinMsg) + receivedMessages <- args[1].(*components.ReceivedMessage) }) }) defer done() From 4c93d19ae66548a41ee48740241f72205af905f1 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sat, 4 Jan 2025 12:21:31 -0500 Subject: [PATCH 26/41] Add activate/deactivate node functions Signed-off-by: Peter Broadhurst --- core/go/internal/plugins/transports.go | 30 +++++++++++++++++++++ core/go/internal/plugins/transports_test.go | 23 +++++++++++++--- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/core/go/internal/plugins/transports.go b/core/go/internal/plugins/transports.go index b4eee0728..08702c50f 100644 --- a/core/go/internal/plugins/transports.go +++ b/core/go/internal/plugins/transports.go @@ -129,3 +129,33 @@ func (br *TransportBridge) GetLocalDetails(ctx context.Context, req *prototk.Get ) return } + +func (br *TransportBridge) ActivateNode(ctx context.Context, req *prototk.ActivateNodeRequest) (res *prototk.ActivateNodeResponse, err error) { + err = br.toPlugin.RequestReply(ctx, + func(dm plugintk.PluginMessage[prototk.TransportMessage]) { + dm.Message().RequestToTransport = &prototk.TransportMessage_ActivateNode{ActivateNode: req} + }, + func(dm plugintk.PluginMessage[prototk.TransportMessage]) bool { + if r, ok := dm.Message().ResponseFromTransport.(*prototk.TransportMessage_ActivateNodeRes); ok { + res = r.ActivateNodeRes + } + return res != nil + }, + ) + return +} + +func (br *TransportBridge) DeactivateNode(ctx context.Context, req *prototk.DeactivateNodeRequest) (res *prototk.DeactivateNodeResponse, err error) { + err = br.toPlugin.RequestReply(ctx, + func(dm plugintk.PluginMessage[prototk.TransportMessage]) { + dm.Message().RequestToTransport = &prototk.TransportMessage_DeactivateNode{DeactivateNode: req} + }, + func(dm plugintk.PluginMessage[prototk.TransportMessage]) bool { + if r, ok := dm.Message().ResponseFromTransport.(*prototk.TransportMessage_DeactivateNodeRes); ok { + res = r.DeactivateNodeRes + } + return res != nil + }, + ) + return +} diff --git a/core/go/internal/plugins/transports_test.go b/core/go/internal/plugins/transports_test.go index f2c6c1008..555d3560c 100644 --- a/core/go/internal/plugins/transports_test.go +++ b/core/go/internal/plugins/transports_test.go @@ -122,12 +122,20 @@ func TestTransportRequestsOK(t *testing.T) { return &prototk.ConfigureTransportResponse{}, nil }, SendMessage: func(ctx context.Context, smr *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) { - assert.Equal(t, "node1", smr.Message.Node) + assert.Equal(t, "type1", smr.Message.MessageType) return &prototk.SendMessageResponse{}, nil }, GetLocalDetails: func(ctx context.Context, gldr *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) { return &prototk.GetLocalDetailsResponse{TransportDetails: "endpoint stuff"}, nil }, + ActivateNode: func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + assert.Equal(t, "node1", anr.NodeName) + return &prototk.ActivateNodeResponse{PeerInfoJson: `{"endpoint": "stuff"}`}, nil + }, + DeactivateNode: func(ctx context.Context, danr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + assert.Equal(t, "node1", danr.NodeName) + return &prototk.DeactivateNodeResponse{}, nil + }, } ttm := &testTransportManager{ @@ -166,7 +174,7 @@ func TestTransportRequestsOK(t *testing.T) { require.NoError(t, err) smr, err := transportAPI.SendMessage(ctx, &prototk.SendMessageRequest{ - Message: &prototk.Message{Node: "node1"}, + Message: &prototk.PaladinMsg{MessageType: "type1"}, }) require.NoError(t, err) assert.NotNil(t, smr) @@ -176,6 +184,15 @@ func TestTransportRequestsOK(t *testing.T) { assert.NotNil(t, smr) assert.Equal(t, "endpoint stuff", gldr.TransportDetails) + anr, err := transportAPI.ActivateNode(ctx, &prototk.ActivateNodeRequest{NodeName: "node1"}) + require.NoError(t, err) + assert.NotNil(t, anr) + assert.Equal(t, `{"endpoint": "stuff"}`, anr.PeerInfoJson) + + danr, err := transportAPI.DeactivateNode(ctx, &prototk.DeactivateNodeRequest{NodeName: "node1"}) + require.NoError(t, err) + assert.NotNil(t, danr) + // This is the point the transport manager would call us to say the transport is initialized // (once it's happy it's updated its internal state) transportAPI.Initialized() @@ -188,7 +205,7 @@ func TestTransportRequestsOK(t *testing.T) { require.NoError(t, err) assert.Equal(t, "node1_details", rts.TransportDetails) rms, err := callbacks.ReceiveMessage(ctx, &prototk.ReceiveMessageRequest{ - Message: &prototk.Message{ + Message: &prototk.PaladinMsg{ Payload: []byte("body1"), }, }) From 3962e634a1e9418c095bba10100f5c66fba04045 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 15:44:00 -0500 Subject: [PATCH 27/41] Update types in private TX manager and remove redundant logic/interfaces Signed-off-by: Peter Broadhurst --- core/go/internal/components/transportmgr.go | 3 +- .../privatetxnmgr/assemble_coordinator.go | 3 +- .../internal/privatetxnmgr/private_txn_mgr.go | 44 ++------- .../privatetxnmgr/private_txn_mgr_test.go | 66 ++++++++------ .../privatetxnmgr/ptmgrtypes/types.go | 2 +- core/go/internal/privatetxnmgr/sequencer.go | 47 ++++------ .../privatetxnmgr/sequencer_dispatch.go | 25 +++--- .../internal/privatetxnmgr/sequencer_test.go | 4 +- .../state_distribution_builder.go | 20 ++--- .../state_distribution_builder_test.go | 15 ++-- .../privatetxnmgr/syncpoints/dispatch.go | 89 +++++++------------ .../privatetxnmgr/syncpoints/syncpoints.go | 18 ++-- .../syncpoints/syncpoints_test.go | 13 +-- .../transaction_flow_mutators.go | 10 +-- .../privatetxnmgr/transaction_flow_test.go | 2 +- .../privatetxnmgr/transport_receiver.go | 14 ++- .../privatetxnmgr/transport_writer.go | 20 ++--- core/go/internal/transportmgr/manager.go | 31 +++++-- core/go/internal/transportmgr/manager_test.go | 11 +++ core/go/pkg/proto/engine.proto | 14 --- 20 files changed, 197 insertions(+), 254 deletions(-) diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index d0e93d24d..e2aa7058f 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -59,6 +59,7 @@ func (t ReliableMessageType) Options() []string { return []string{ string(RMTState), string(RMTReceipt), + string(RMTPreparedTransaction), } } @@ -151,5 +152,5 @@ type TransportManager interface { // including over node restart, until an ack is returned from the remote node. // // The pre-commit handler must be called after the DB transaction commits to trigger the delivery. - SendReliable(ctx context.Context, dbTX *gorm.DB, msg *ReliableMessage) (preCommit func(), err error) + SendReliable(ctx context.Context, dbTX *gorm.DB, msg ...*ReliableMessage) (preCommit func(), err error) } diff --git a/core/go/internal/privatetxnmgr/assemble_coordinator.go b/core/go/internal/privatetxnmgr/assemble_coordinator.go index 4ec411e7a..549b8d81a 100644 --- a/core/go/internal/privatetxnmgr/assemble_coordinator.go +++ b/core/go/internal/privatetxnmgr/assemble_coordinator.go @@ -67,10 +67,9 @@ func NewAssembleCoordinator(ctx context.Context, nodeName string, maxPendingRequ } } -func (ac *assembleCoordinator) Complete(requestID string, stateDistributions []*components.StateDistributionWithData) { +func (ac *assembleCoordinator) Complete(requestID string) { log.L(ac.ctx).Debugf("AssembleCoordinator:Commit %s", requestID) - ac.stateDistributer.DistributeStates(ac.ctx, stateDistributions) ac.commit <- requestID } diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr.go b/core/go/internal/privatetxnmgr/private_txn_mgr.go index 0f59f0c60..54c4d8917 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr.go @@ -76,7 +76,7 @@ func (p *privateTxManager) PreInit(c components.PreInitComponents) (*components. func (p *privateTxManager) PostInit(c components.AllComponents) error { p.components = c p.nodeName = p.components.TransportManager().LocalNodeName() - p.syncPoints = syncpoints.NewSyncPoints(p.ctx, &p.config.Writer, c.Persistence(), c.TxManager(), c.PublicTxManager()) + p.syncPoints = syncpoints.NewSyncPoints(p.ctx, &p.config.Writer, c.Persistence(), c.TxManager(), c.PublicTxManager(), c.TransportManager()) return nil } @@ -155,8 +155,6 @@ func (p *privateTxManager) getSequencerForContract(ctx context.Context, dbTX *go publisher, p.syncPoints, p.components.IdentityResolver(), - p.stateDistributer, - p.preparedTransactionDistributer, transportWriter, confutil.DurationMin(p.config.RequestTimeout, 0, *pldconf.PrivateTxManagerDefaults.RequestTimeout), p.blockHeight, @@ -659,12 +657,11 @@ func (p *privateTxManager) handleEndorsementRequest(ctx context.Context, message return } - err = p.components.TransportManager().Send(ctx, &components.TransportMessage{ + err = p.components.TransportManager().Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "EndorsementResponse", - ReplyTo: p.nodeName, Payload: endorsementResponseBytes, Node: replyTo, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }) if err != nil { log.L(ctx).Errorf("Failed to send endorsement response: %s", err) @@ -775,12 +772,11 @@ func (p *privateTxManager) sendAssembleError(ctx context.Context, node string, a log.L(ctx).Infof("Sending Assemble Error: ContractAddress: %s, TransactionId: %s, AssembleRequestId %s, Error: %s", contractAddress, transactionID, assembleRequestId, assembleError.ErrorMessage) - err = p.components.TransportManager().Send(ctx, &components.TransportMessage{ + err = p.components.TransportManager().Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "AssembleError", - ReplyTo: p.nodeName, Payload: assembleErrorBytes, Node: node, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }) if err != nil { log.L(ctx).Errorf("Failed to send assemble error: %s", err) @@ -859,12 +855,11 @@ func (p *privateTxManager) handleAssembleRequest(ctx context.Context, messagePay return } - err = p.components.TransportManager().Send(ctx, &components.TransportMessage{ + err = p.components.TransportManager().Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "AssembleResponse", - ReplyTo: p.nodeName, Payload: assembleResponseBytes, Node: replyTo, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, }) if err != nil { log.L(ctx).Errorf("Failed to send assemble response: %s", err) @@ -995,31 +990,6 @@ func (p *privateTxManager) PrivateTransactionConfirmed(ctx context.Context, rece } } -func (p *privateTxManager) handleStateProducedEvent(ctx context.Context, messagePayload []byte, replyToDestination string) { - - //in the meantime, we share with the sequencer in memory in case that state is needed to assemble in flight transactions - stateProducedEvent := &pbEngine.StateProducedEvent{} - err := proto.Unmarshal(messagePayload, stateProducedEvent) - if err != nil { - log.L(ctx).Errorf("Failed to unmarshal StateProducedEvent: %s", err) - return - } - - //State distributer deals with the reliable delivery e.g. sending acks etc - go p.stateDistributer.HandleStateProducedEvent(ctx, stateProducedEvent, replyToDestination) - - contractAddressString := stateProducedEvent.ContractAddress - contractAddress := tktypes.MustEthAddress(contractAddressString) - - sequencer, err := p.getSequencerForContract(ctx, p.components.Persistence().DB(), *contractAddress, nil) - if err != nil { - log.L(ctx).Errorf("Failed to get sequencer for contract address %s: %s", contractAddress, err) - return - } - sequencer.HandleStateProducedEvent(ctx, stateProducedEvent) - -} - func (p *privateTxManager) CallPrivateSmartContract(ctx context.Context, call *components.ResolvedTransaction) (*abi.ComponentValue, error) { callTx := call.Transaction diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go index 29ea8f2f6..b7b3a8eb4 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go @@ -85,6 +85,16 @@ func mockWritePublicTxsOk(mocks *dependencyMocks) chan struct{} { return dispatched } +func inMsgToOut(fromNode string, send *components.FireAndForgetMessageSend) *components.ReceivedMessage { + return &components.ReceivedMessage{ + FromNode: fromNode, + MessageID: uuid.New(), + CorrelationID: send.CorrelationID, + MessageType: send.MessageType, + Payload: send.Payload, + } +} + func TestPrivateTxManagerInit(t *testing.T) { privateTxManager, mocks := NewPrivateTransactionMgrForPackageTesting(t, "node1") @@ -854,16 +864,16 @@ func TestPrivateTxManagerRemoteNotaryEndorser(t *testing.T) { localNodeMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { - assert.Equal(t, remoteNodeName, args.Get(1).(*components.ReceivedMessage).Node) - transportMessage := args.Get(1).(*components.TransportMessage) - remoteEngine.HandlePaladinMsg(ctx, transportMessage) + assert.Equal(t, remoteNodeName, args.Get(1).(*components.ReceivedMessage).FromNode) + send := args.Get(1).(*components.FireAndForgetMessageSend) + remoteEngine.HandlePaladinMsg(ctx, inMsgToOut(localNodeName, send)) }() }).Return(nil).Maybe() remoteEngineMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { - transportMessage := args.Get(1).(*components.TransportMessage) - privateTxManager.HandlePaladinMsg(ctx, transportMessage) + send := args.Get(1).(*components.FireAndForgetMessageSend) + privateTxManager.HandlePaladinMsg(ctx, inMsgToOut(remoteNodeName, send)) }() }).Return(nil).Maybe() @@ -1042,21 +1052,21 @@ func TestPrivateTxManagerRemoteNotaryEndorserRetry(t *testing.T) { localNodeMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { - assert.Equal(t, remoteNodeName, args.Get(1).(*components.TransportMessage).Node) - transportMessage := args.Get(1).(*components.TransportMessage) - if transportMessage.MessageType == "DelegationRequest" && !ignoredDelegateRequest { + assert.Equal(t, remoteNodeName, args.Get(1).(*components.FireAndForgetMessageSend).Node) + send := args.Get(1).(*components.FireAndForgetMessageSend) + if send.MessageType == "DelegationRequest" && !ignoredDelegateRequest { //ignore the first delegate request and force a retry ignoredDelegateRequest = true } else { - remoteEngine.HandlePaladinMsg(ctx, transportMessage) + remoteEngine.HandlePaladinMsg(ctx, inMsgToOut(localNodeName, send)) } }() }).Return(nil).Maybe() remoteEngineMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { - transportMessage := args.Get(1).(*components.TransportMessage) - privateTxManager.HandlePaladinMsg(ctx, transportMessage) + send := args.Get(1).(*components.FireAndForgetMessageSend) + privateTxManager.HandlePaladinMsg(ctx, inMsgToOut(remoteNodeName, send)) }() }).Return(nil).Maybe() @@ -1868,9 +1878,9 @@ func TestPrivateTxManagerDependantTransactionEndorsedOutOfOrder(t *testing.T) { sentEndorsementRequest := make(chan string, 1) aliceEngineMocks.transportManager.On("Send", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { - message := args.Get(1).(*components.TransportMessage) + send := args.Get(1).(*components.FireAndForgetMessageSend) endorsementRequest := &pbEngine.EndorsementRequest{} - err := proto.Unmarshal(message.Payload, endorsementRequest) + err := proto.Unmarshal(send.Payload, endorsementRequest) if err != nil { log.L(ctx).Errorf("Failed to unmarshal endorsement request: %s", err) return @@ -2004,7 +2014,8 @@ func TestPrivateTxManagerDependantTransactionEndorsedOutOfOrder(t *testing.T) { require.NoError(t, err) //now send the endorsements back - aliceEngine.HandlePaladinMsg(ctx, &components.TransportMessage{ + aliceEngine.HandlePaladinMsg(ctx, &components.ReceivedMessage{ + FromNode: bobNodeName, MessageType: "EndorsementResponse", Payload: endorsementResponse2bytes, }) @@ -2034,7 +2045,8 @@ func TestPrivateTxManagerDependantTransactionEndorsedOutOfOrder(t *testing.T) { require.NoError(t, err) //now send the final endorsement back - aliceEngine.HandlePaladinMsg(ctx, &components.TransportMessage{ + aliceEngine.HandlePaladinMsg(ctx, &components.ReceivedMessage{ + FromNode: bobNodeName, MessageType: "EndorsementResponse", Payload: endorsementResponse1Bytes, }) @@ -2678,20 +2690,22 @@ func (m *dependencyMocks) mockForSubmitter(t *testing.T, transactionID *uuid.UUI func mockNetwork(t *testing.T, transactionManagers []privateTransactionMgrForPackageTesting) { - routeToNode := func(args mock.Arguments) { - go func() { - transportMessage := args.Get(1).(*components.TransportMessage) - for _, tm := range transactionManagers { - if tm.NodeName() == transportMessage.Node { - tm.HandlePaladinMsg(context.Background(), transportMessage) - return + routeToNode := func(fromNode string) func(args mock.Arguments) { + return func(args mock.Arguments) { + go func() { + send := args.Get(1).(*components.FireAndForgetMessageSend) + for _, tm := range transactionManagers { + if tm.NodeName() == send.Node { + tm.HandlePaladinMsg(context.Background(), inMsgToOut(fromNode, send)) + return + } } - } - assert.Failf(t, "no transaction manager found for node %s", transportMessage.Node) - }() + assert.Failf(t, "no transaction manager found for node %s", send.Node) + }() + } } for _, tm := range transactionManagers { - tm.DependencyMocks().transportManager.On("Send", mock.Anything, mock.Anything).Run(routeToNode).Return(nil).Maybe() + tm.DependencyMocks().transportManager.On("Send", mock.Anything, mock.Anything).Run(routeToNode(tm.NodeName())).Return(nil).Maybe() } } diff --git a/core/go/internal/privatetxnmgr/ptmgrtypes/types.go b/core/go/internal/privatetxnmgr/ptmgrtypes/types.go index b2f9c32c9..5778b2635 100644 --- a/core/go/internal/privatetxnmgr/ptmgrtypes/types.go +++ b/core/go/internal/privatetxnmgr/ptmgrtypes/types.go @@ -162,7 +162,7 @@ type AssembleCoordinator interface { Start() Stop() QueueAssemble(ctx context.Context, assemblingNode string, transactionID uuid.UUID, transactionPreAssembly *components.TransactionPreAssembly) - Complete(requestID string, stateDistributions []*components.StateDistributionWithData) + Complete(requestID string) } type LocalAssembler interface { diff --git a/core/go/internal/privatetxnmgr/sequencer.go b/core/go/internal/privatetxnmgr/sequencer.go index 49f22a4d5..19e2acfcb 100644 --- a/core/go/internal/privatetxnmgr/sequencer.go +++ b/core/go/internal/privatetxnmgr/sequencer.go @@ -29,7 +29,6 @@ import ( "github.com/kaleido-io/paladin/core/internal/msgs" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes" "github.com/kaleido-io/paladin/core/internal/privatetxnmgr/syncpoints" - pbEngine "github.com/kaleido-io/paladin/core/pkg/proto/engine" "github.com/kaleido-io/paladin/toolkit/pkg/log" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" @@ -158,22 +157,21 @@ func NewSequencer( incompleteTxSProcessMap: make(map[string]ptmgrtypes.TransactionFlow), persistenceRetryTimeout: confutil.DurationMin(sequencerConfig.PersistenceRetryTimeout, 1*time.Millisecond, *pldconf.PrivateTxManagerDefaults.Sequencer.PersistenceRetryTimeout), - staleTimeout: confutil.DurationMin(sequencerConfig.StaleTimeout, 1*time.Millisecond, *pldconf.PrivateTxManagerDefaults.Sequencer.StaleTimeout), - processedTxIDs: make(map[string]bool), - orchestrationEvalRequestChan: make(chan bool, 1), - stopProcess: make(chan bool, 1), - pendingTransactionEvents: make(chan ptmgrtypes.PrivateTransactionEvent, *pldconf.PrivateTxManagerDefaults.Sequencer.MaxPendingEvents), - nodeName: nodeName, - domainAPI: domainAPI, - components: allComponents, - endorsementGatherer: endorsementGatherer, - publisher: publisher, - syncPoints: syncPoints, - identityResolver: identityResolver, - preparedTransactionDistributer: preparedTransactionDistributer, - transportWriter: transportWriter, - graph: NewGraph(), - requestTimeout: requestTimeout, + staleTimeout: confutil.DurationMin(sequencerConfig.StaleTimeout, 1*time.Millisecond, *pldconf.PrivateTxManagerDefaults.Sequencer.StaleTimeout), + processedTxIDs: make(map[string]bool), + orchestrationEvalRequestChan: make(chan bool, 1), + stopProcess: make(chan bool, 1), + pendingTransactionEvents: make(chan ptmgrtypes.PrivateTransactionEvent, *pldconf.PrivateTxManagerDefaults.Sequencer.MaxPendingEvents), + nodeName: nodeName, + domainAPI: domainAPI, + components: allComponents, + endorsementGatherer: endorsementGatherer, + publisher: publisher, + syncPoints: syncPoints, + identityResolver: identityResolver, + transportWriter: transportWriter, + graph: NewGraph(), + requestTimeout: requestTimeout, environment: &sequencerEnvironment{ blockHeight: blockHeight, }, @@ -371,18 +369,3 @@ func (s *Sequencer) GetTxStatus(ctx context.Context, txID uuid.UUID) (components FailureMessage: failureMessage, }, nil } - -func (s *Sequencer) HandleStateProducedEvent(ctx context.Context, stateProducedEvent *pbEngine.StateProducedEvent) { - readTX := s.components.Persistence().DB() // no DB transaction required here for the reads from the DB - log.L(ctx).Debug("Sequencer:HandleStateProducedEvent Upserting state to delegateDomainContext") - - states, err := s.delegateDomainContext.UpsertStates(readTX, &components.StateUpsert{ - SchemaID: tktypes.MustParseBytes32(stateProducedEvent.SchemaId), - Data: tktypes.RawJSON(stateProducedEvent.StateDataJson), - }) - if err != nil { - log.L(ctx).Errorf("Error upserting states: %s", err) - return - } - log.L(ctx).Debugf("Upserted states: %v", states) -} diff --git a/core/go/internal/privatetxnmgr/sequencer_dispatch.go b/core/go/internal/privatetxnmgr/sequencer_dispatch.go index cdd0f993e..82e35972b 100644 --- a/core/go/internal/privatetxnmgr/sequencer_dispatch.go +++ b/core/go/internal/privatetxnmgr/sequencer_dispatch.go @@ -42,7 +42,7 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa PublicDispatches: make([]*syncpoints.PublicDispatch, 0, len(dispatchableTransactions)), } - stateDistributions := make([]*components.StateDistributionWithData, 0) + stateDistributions := make([]*components.StateDistribution, 0) localStateDistributions := make([]*components.StateDistributionWithData, 0) preparedTxnDistributions := make([]*components.PreparedTransactionWithRefs, 0) @@ -100,7 +100,9 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa if err != nil { return err } - stateDistributions = append(stateDistributions, sds.Remote...) + for _, sd := range sds.Remote { + stateDistributions = append(stateDistributions, &sd.StateDistribution) + } localStateDistributions = append(localStateDistributions, sds.Local...) } @@ -184,10 +186,6 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa for _, preparedTransaction := range dispatchBatch.PreparedTransactions { s.publisher.PublishTransactionPreparedEvent(ctx, preparedTransaction.ID.String()) } - //now that the DB write has been persisted, we can trigger the in-memory distribution of the prepared transactions and states - s.todo.DistributeStates(ctx, stateDistributions) - - s.preparedTransactionDistributer.DistributePreparedTransactions(ctx, preparedTxnDistributions) // We also need to trigger ourselves for any private TX we chained for _, tx := range dispatchBatch.PrivateDispatches { @@ -202,11 +200,12 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa func mapPreparedTransaction(tx *components.PrivateTransaction) *components.PreparedTransactionWithRefs { pt := &components.PreparedTransactionWithRefs{ - ID: tx.ID, - Domain: tx.Domain, - To: &tx.Address, - Metadata: tx.PreparedMetadata, - Sender: tx.PreAssembly.TransactionSpecification.From, + PreparedTransactionBase: &pldapi.PreparedTransactionBase{ + ID: tx.ID, + Domain: tx.Domain, + To: &tx.Address, + Metadata: tx.PreparedMetadata, + }, } for _, s := range tx.PostAssembly.InputStates { pt.StateRefs.Spent = append(pt.StateRefs.Spent, s.ID) @@ -221,9 +220,9 @@ func mapPreparedTransaction(tx *components.PrivateTransaction) *components.Prepa pt.StateRefs.Info = append(pt.StateRefs.Info, s.ID) } if tx.PreparedPublicTransaction != nil { - pt.Transaction = tx.PreparedPublicTransaction + pt.Transaction = *tx.PreparedPublicTransaction } else { - pt.Transaction = tx.PreparedPrivateTransaction + pt.Transaction = *tx.PreparedPrivateTransaction } return pt diff --git a/core/go/internal/privatetxnmgr/sequencer_test.go b/core/go/internal/privatetxnmgr/sequencer_test.go index bc9ae7d54..bc2f1e16c 100644 --- a/core/go/internal/privatetxnmgr/sequencer_test.go +++ b/core/go/internal/privatetxnmgr/sequencer_test.go @@ -97,8 +97,8 @@ func newSequencerForTesting(t *testing.T, ctx context.Context, domainAddress *tk mocks.stateStore.On("NewDomainContext", mock.Anything, mocks.domain, *domainAddress, mock.Anything).Return(mocks.domainContext).Maybe() //mocks.domain.On("Configuration").Return(&prototk.DomainConfig{}).Maybe() - syncPoints := syncpoints.NewSyncPoints(ctx, &pldconf.FlushWriterConfig{}, p, mocks.txManager, mocks.pubTxManager) - o, err := NewSequencer(ctx, mocks.privateTxManager, tktypes.RandHex(16), *domainAddress, &pldconf.PrivateTxManagerSequencerConfig{}, mocks.allComponents, mocks.domainSmartContract, mocks.endorsementGatherer, mocks.publisher, syncPoints, mocks.identityResolver, mocks.stateDistributer, mocks.preparedTransactionDistributer, mocks.transportWriter, 30*time.Second, 0) + syncPoints := syncpoints.NewSyncPoints(ctx, &pldconf.FlushWriterConfig{}, p, mocks.txManager, mocks.pubTxManager, mocks.transportManager) + o, err := NewSequencer(ctx, mocks.privateTxManager, tktypes.RandHex(16), *domainAddress, &pldconf.PrivateTxManagerSequencerConfig{}, mocks.allComponents, mocks.domainSmartContract, mocks.endorsementGatherer, mocks.publisher, syncPoints, mocks.identityResolver, mocks.transportWriter, 30*time.Second, 0) require.NoError(t, err) ocDone, err := o.Start(ctx) require.NoError(t, err) diff --git a/core/go/internal/privatetxnmgr/state_distribution_builder.go b/core/go/internal/privatetxnmgr/state_distribution_builder.go index f02e98e10..edfae0759 100644 --- a/core/go/internal/privatetxnmgr/state_distribution_builder.go +++ b/core/go/internal/privatetxnmgr/state_distribution_builder.go @@ -18,7 +18,6 @@ package privatetxnmgr import ( "context" - "github.com/google/uuid" "github.com/hyperledger/firefly-common/pkg/i18n" "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/internal/msgs" @@ -79,15 +78,16 @@ func (sd *stateDistributionBuilder) processStateForDistribution(ctx context.Cont remainingNullifiers = newRemainingNullifiers distribution := &components.StateDistributionWithData{ - ID: uuid.New().String(), - IdentityLocator: recipient, - Domain: tx.Domain, - ContractAddress: tx.Address.String(), - // the state data json is available on both but we take it - // from the outputState to make sure it is the same json that was used to generate the hash - StateID: fullState.ID.String(), - SchemaID: fullState.Schema.String(), - StateDataJson: string(fullState.Data), + StateDistribution: components.StateDistribution{ + IdentityLocator: recipient, + Domain: tx.Domain, + ContractAddress: tx.Address.String(), + // the state data json is available on both but we take it + // from the outputState to make sure it is the same json that was used to generate the hash + StateID: fullState.ID.String(), + SchemaID: fullState.Schema.String(), + }, + StateData: fullState.Data, } // Add the nullifier requirement if there is one diff --git a/core/go/internal/privatetxnmgr/state_distribution_builder_test.go b/core/go/internal/privatetxnmgr/state_distribution_builder_test.go index 8b5c6953f..9ab43b36e 100644 --- a/core/go/internal/privatetxnmgr/state_distribution_builder_test.go +++ b/core/go/internal/privatetxnmgr/state_distribution_builder_test.go @@ -80,21 +80,19 @@ func TestStateDistributionBuilderAllSenderNoNullifiers(t *testing.T) { assert.Empty(t, sds.Remote) require.Len(t, sds.Local, 2) - assert.NotEmpty(t, sds.Local[0].ID) assert.Equal(t, "sender@node1", sds.Local[0].IdentityLocator) assert.Equal(t, "domain1", sds.Local[0].Domain) assert.Equal(t, contractAddr.String(), sds.Local[0].ContractAddress) assert.Equal(t, state1ID.String(), sds.Local[0].StateID) assert.Equal(t, schema1ID.String(), sds.Local[0].SchemaID) - assert.JSONEq(t, `{"state":"1"}`, sds.Local[0].StateDataJson) + assert.JSONEq(t, `{"state":"1"}`, sds.Local[0].StateData.String()) - assert.NotEmpty(t, sds.Local[1].ID) assert.Equal(t, "sender@node1", sds.Local[1].IdentityLocator) assert.Equal(t, "domain1", sds.Local[1].Domain) assert.Equal(t, contractAddr.String(), sds.Local[1].ContractAddress) assert.Equal(t, state2ID.String(), sds.Local[1].StateID) assert.Equal(t, schema2ID.String(), sds.Local[1].SchemaID) - assert.JSONEq(t, `{"state":"2"}`, sds.Local[1].StateDataJson) + assert.JSONEq(t, `{"state":"2"}`, sds.Local[1].StateData.String()) } func TestStateDistributionWithNullifiersAllRemote(t *testing.T) { @@ -171,24 +169,21 @@ func TestStateDistributionWithNullifiersAllRemote(t *testing.T) { } // Bob gets his change on node2 - assert.NotEmpty(t, sds.Remote[0].ID) assert.Equal(t, "bob@node2", sds.Remote[0].IdentityLocator) assert.Equal(t, state1ID.String(), sds.Remote[0].StateID) - assert.JSONEq(t, `{"coin":"with change back to bob"}`, sds.Remote[0].StateDataJson) + assert.JSONEq(t, `{"coin":"with change back to bob"}`, sds.Remote[0].StateData.String()) checkCommon(sds.Remote[0], true) // Sally gets her coin - assert.NotEmpty(t, sds.Remote[1].ID) assert.Equal(t, "sally@node3", sds.Remote[1].IdentityLocator) assert.Equal(t, state2ID.String(), sds.Remote[1].StateID) - assert.JSONEq(t, `{"coin":"with value for sally"}`, sds.Remote[2].StateDataJson) + assert.JSONEq(t, `{"coin":"with value for sally"}`, sds.Remote[2].StateData.String()) checkCommon(sds.Remote[1], true) // Bob also gets sally's coin - but without a nullifier spec - assert.NotEmpty(t, sds.Remote[2].ID) assert.Equal(t, "bob@node2", sds.Remote[2].IdentityLocator) assert.Equal(t, state2ID.String(), sds.Remote[2].StateID) - assert.JSONEq(t, `{"coin":"with value for sally"}`, sds.Remote[2].StateDataJson) + assert.JSONEq(t, `{"coin":"with value for sally"}`, sds.Remote[2].StateData.String()) checkCommon(sds.Remote[2], false) } diff --git a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go index 9aeaab96d..cbd50915b 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go +++ b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go @@ -28,10 +28,10 @@ import ( ) type dispatchOperation struct { - publicDispatches []*PublicDispatch - privateDispatches []*components.ValidatedTransaction - preparedTransactions []*components.PreparedTransactionWithRefs - preparedTxnDistributions []*components.PreparedTransactionWithRefs + publicDispatches []*PublicDispatch + privateDispatches []*components.ValidatedTransaction + preparedTransactions []*components.PreparedTransactionWithRefs + preparedReliableMsgs []*components.ReliableMessage } type DispatchPersisted struct { @@ -57,19 +57,26 @@ type DispatchBatch struct { // PersistDispatches persists the dispatches to the database and coordinates with the public transaction manager // to submit public transactions. -func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*components.PreparedTransactionWithRefs) error { +func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistribution, preparedTxnDistributions []*components.PreparedTransactionWithRefs) error { + + preparedReliableMsgs := make([]*components.ReliableMessage, 0, + len(dispatchBatch.PreparedTransactions)+len(stateDistributions)) - preparedTxnDistributionsPersisted := make([]*components.ReliableMessage, 0, len(dispatchBatch.PreparedTransactions)) for _, preparedTxnDistribution := range preparedTxnDistributions { - preparedTxnDistributionsPersisted = append(preparedTxnDistributionsPersisted, &components.ReliableMessage{ - MessageType: components.RMTPreparedTransaction.Enum(), - Node: "node2", - Metadata: tktypes.JSONString(sds[i]), - ID: preparedTxnDistribution.ID, - PreparedTxnID: preparedTxnDistribution.PreparedTxnID, - IdentityLocator: preparedTxnDistribution.IdentityLocator, - DomainName: preparedTxnDistribution.Domain, - ContractAddress: preparedTxnDistribution.ContractAddress, + node, _ := tktypes.PrivateIdentityLocator(preparedTxnDistribution.Transaction.From).Node(dCtx.Ctx(), false) + preparedReliableMsgs = append(preparedReliableMsgs, &components.ReliableMessage{ + Node: node, + MessageType: components.RMTPreparedTransaction.Enum(), + Metadata: tktypes.JSONString(preparedTxnDistribution), + }) + } + + for _, stateDistribution := range stateDistributions { + node, _ := tktypes.PrivateIdentityLocator(stateDistribution.IdentityLocator).Node(dCtx.Ctx(), false) + preparedReliableMsgs = append(preparedReliableMsgs, &components.ReliableMessage{ + Node: node, + MessageType: components.RMTState.Enum(), + Metadata: tktypes.JSONString(stateDistribution), }) } @@ -78,10 +85,10 @@ func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contrac domainContext: dCtx, contractAddress: contractAddress, dispatchOperation: &dispatchOperation{ - publicDispatches: dispatchBatch.PublicDispatches, - privateDispatches: dispatchBatch.PrivateDispatches, - preparedTransactions: dispatchBatch.PreparedTransactions, - preparedTxnDistributions: preparedTxnDistributionsPersisted, + publicDispatches: dispatchBatch.PublicDispatches, + privateDispatches: dispatchBatch.PrivateDispatches, + preparedTransactions: dispatchBatch.PreparedTransactions, + preparedReliableMsgs: preparedReliableMsgs, }, }) @@ -181,49 +188,17 @@ func (s *syncPoints) writeDispatchOperations(ctx context.Context, dbTX *gorm.DB, postCommits = append(postCommits, txPostCommit) } - if len(op.preparedTxnDistributions) == 0 { - log.L(ctx).Debug("No prepared transaction distributions to persist") - } else { - - log.L(ctx).Debugf("Writing distribution record to send prepared transaction to remote node %d", len(op.preparedTxnDistributions)) - err := dbTX. - Table("prepared_txn_distributions"). - Clauses(clause.OnConflict{ - Columns: []clause.Column{ - {Name: "prepared_txn_id"}, - {Name: "identity_locator"}, - }, - DoNothing: true, // immutable - }). - Create(op.preparedTxnDistributions). - Error - - if err != nil { - log.L(ctx).Errorf("Error persisting prepared transaction distributions: %s", err) - return nil, err - } - } - - if len(op.stateDistributions) == 0 { - log.L(ctx).Debug("No state distributions to persist") + if len(op.preparedReliableMsgs) == 0 { + log.L(ctx).Debug("No prepared reliable messages to persist to persist") } else { - log.L(ctx).Debugf("Writing state distributions %d", len(op.stateDistributions)) - err := dbTX. - Table("state_distributions"). - Clauses(clause.OnConflict{ - Columns: []clause.Column{ - {Name: "state_id"}, - {Name: "identity_locator"}, - }, - DoNothing: true, // immutable - }). - Create(op.stateDistributions). - Error + log.L(ctx).Debugf("Writing %d reliable messages", len(op.preparedReliableMsgs)) + msgPostCommit, err := s.transportMgr.SendReliable(ctx, dbTX, op.preparedReliableMsgs...) if err != nil { - log.L(ctx).Errorf("Error persisting state distributions: %s", err) + log.L(ctx).Errorf("Error persisting prepared reliable messages: %s", err) return nil, err } + postCommits = append(postCommits, msgPostCommit) } } diff --git a/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go b/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go index c01803722..c9ef43a44 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go +++ b/core/go/internal/privatetxnmgr/syncpoints/syncpoints.go @@ -50,7 +50,7 @@ type SyncPoints interface { // to the PrivateTxnManager's persistence store in the same database transaction // Although the actual persistence is offloaded to the flushwriter, this method is synchronous and will block until the // dispatch sequence is written to the database - PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistributionWithData, preparedTxnDistributions []*components.PreparedTransactionWithRefs) error + PersistDispatchBatch(dCtx components.DomainContext, contractAddress tktypes.EthAddress, dispatchBatch *DispatchBatch, stateDistributions []*components.StateDistribution, preparedTxnDistributions []*components.PreparedTransactionWithRefs) error // Deploy is a special case of dispatch batch, where there are no private states, so no domain context is required PersistDeployDispatchBatch(ctx context.Context, dispatchBatch *DispatchBatch) error @@ -64,16 +64,18 @@ type SyncPoints interface { } type syncPoints struct { - started bool - writer flushwriter.Writer[*syncPointOperation, *noResult] - txMgr components.TXManager - pubTxMgr components.PublicTxManager + started bool + writer flushwriter.Writer[*syncPointOperation, *noResult] + txMgr components.TXManager + pubTxMgr components.PublicTxManager + transportMgr components.TransportManager } -func NewSyncPoints(ctx context.Context, conf *pldconf.FlushWriterConfig, p persistence.Persistence, txMgr components.TXManager, pubTxMgr components.PublicTxManager) SyncPoints { +func NewSyncPoints(ctx context.Context, conf *pldconf.FlushWriterConfig, p persistence.Persistence, txMgr components.TXManager, pubTxMgr components.PublicTxManager, transportMgr components.TransportManager) SyncPoints { s := &syncPoints{ - txMgr: txMgr, - pubTxMgr: pubTxMgr, + txMgr: txMgr, + pubTxMgr: pubTxMgr, + transportMgr: transportMgr, } s.writer = flushwriter.NewWriter(ctx, s.runBatch, p, conf, &WriterConfigDefaults) return s diff --git a/core/go/internal/privatetxnmgr/syncpoints/syncpoints_test.go b/core/go/internal/privatetxnmgr/syncpoints/syncpoints_test.go index 2847a7e3e..b2343f58e 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/syncpoints_test.go +++ b/core/go/internal/privatetxnmgr/syncpoints/syncpoints_test.go @@ -25,19 +25,22 @@ import ( ) type dependencyMocks struct { - persistence *mockpersistence.SQLMockProvider - txMgr *componentmocks.TXManager + persistence *mockpersistence.SQLMockProvider + txMgr *componentmocks.TXManager + transportMgr *componentmocks.TransportManager } func newSyncPointsForTesting(t *testing.T) (*syncPoints, *dependencyMocks) { p, err := mockpersistence.NewSQLMockProvider() require.NoError(t, err) mocks := &dependencyMocks{ - persistence: p, - txMgr: componentmocks.NewTXManager(t), + persistence: p, + txMgr: componentmocks.NewTXManager(t), + transportMgr: componentmocks.NewTransportManager(t), } return &syncPoints{ - txMgr: mocks.txMgr, + txMgr: mocks.txMgr, + transportMgr: mocks.transportMgr, }, mocks } diff --git a/core/go/internal/privatetxnmgr/transaction_flow_mutators.go b/core/go/internal/privatetxnmgr/transaction_flow_mutators.go index cb54c4e67..9b14c99d2 100644 --- a/core/go/internal/privatetxnmgr/transaction_flow_mutators.go +++ b/core/go/internal/privatetxnmgr/transaction_flow_mutators.go @@ -99,7 +99,7 @@ func (tf *transactionFlow) applyTransactionAssembledEvent(ctx context.Context, e revertReason = *event.PostAssembly.RevertReason } tf.revertTransaction(ctx, i18n.ExpandWithCode(ctx, i18n.MessageKey(msgs.MsgPrivateTxManagerAssembleRevert), revertReason)) - tf.assembleCoordinator.Complete(event.AssembleRequestID, nil) + tf.assembleCoordinator.Complete(event.AssembleRequestID) return } if tf.transaction.PostAssembly.AssemblyResult == prototk.AssembleTransactionResponse_PARK { @@ -107,13 +107,13 @@ func (tf *transactionFlow) applyTransactionAssembledEvent(ctx context.Context, e log.L(ctx).Infof("AssemblyResult is AssembleTransactionResponse_PARK") tf.status = "parked" tf.assemblePending = false - tf.assembleCoordinator.Complete(event.AssembleRequestID, nil) + tf.assembleCoordinator.Complete(event.AssembleRequestID) return } tf.status = "assembled" tf.writeAndLockStates(ctx) //allow assembly thread to proceed - sds, err := tf.GetStateDistributions(ctx) + _, err := tf.GetStateDistributions(ctx) if err != nil { log.L(ctx).Errorf("Error getting state distributions: %s", err) // we need to proceed with unblocking the assembleCoordinator. It wont have a chance to distribute the states to the remote assembler nodes @@ -122,7 +122,7 @@ func (tf *transactionFlow) applyTransactionAssembledEvent(ctx context.Context, e // all transactions if they are valid } - tf.assembleCoordinator.Complete(event.AssembleRequestID, sds.Remote) + tf.assembleCoordinator.Complete(event.AssembleRequestID) } @@ -132,7 +132,7 @@ func (tf *transactionFlow) applyTransactionAssembleFailedEvent(ctx context.Conte tf.latestError = event.Error // set assemblePending to false so that the transaction can be re-assembled tf.assemblePending = false - tf.assembleCoordinator.Complete(event.AssembleRequestID, nil) + tf.assembleCoordinator.Complete(event.AssembleRequestID) } func (tf *transactionFlow) applyTransactionSignedEvent(ctx context.Context, event *ptmgrtypes.TransactionSignedEvent) { diff --git a/core/go/internal/privatetxnmgr/transaction_flow_test.go b/core/go/internal/privatetxnmgr/transaction_flow_test.go index dee921239..c43ec4955 100644 --- a/core/go/internal/privatetxnmgr/transaction_flow_test.go +++ b/core/go/internal/privatetxnmgr/transaction_flow_test.go @@ -89,7 +89,7 @@ func newTransactionFlowForTesting(t *testing.T, ctx context.Context, transaction domain.On("Configuration").Return(&prototk.DomainConfig{}).Maybe() mocks.domainSmartContract.On("Domain").Return(domain).Maybe() - assembleCoordinator := NewAssembleCoordinator(ctx, nodeName, 1, mocks.allComponents, mocks.domainSmartContract, mocks.domainContext, mocks.transportWriter, *contractAddress, mocks.environment, 1*time.Second, mocks.stateDistributer, mocks.localAssembler) + assembleCoordinator := NewAssembleCoordinator(ctx, nodeName, 1, mocks.allComponents, mocks.domainSmartContract, mocks.domainContext, mocks.transportWriter, *contractAddress, mocks.environment, 1*time.Second, mocks.localAssembler) tp := NewTransactionFlow(ctx, transaction, nodeName, mocks.allComponents, mocks.domainSmartContract, mocks.domainContext, mocks.publisher, mocks.endorsementGatherer, mocks.identityResolver, mocks.syncPoints, mocks.transportWriter, 1*time.Minute, mocks.coordinatorSelector, assembleCoordinator, mocks.environment) diff --git a/core/go/internal/privatetxnmgr/transport_receiver.go b/core/go/internal/privatetxnmgr/transport_receiver.go index 4889d0d4a..ca20bc424 100644 --- a/core/go/internal/privatetxnmgr/transport_receiver.go +++ b/core/go/internal/privatetxnmgr/transport_receiver.go @@ -22,33 +22,29 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/log" ) -func (p *privateTxManager) HandlePaladinMsg(ctx context.Context, message *components.TransportMessage) { +func (p *privateTxManager) HandlePaladinMsg(ctx context.Context, message *components.ReceivedMessage) { //TODO this need to become an ultra low latency, non blocking, handover to the event loop thread. // need some thought on how to handle errors, retries, buffering, swapping idle sequencers in and out of memory etc... //Send the event to the sequencer for the contract and any transaction manager for the signing key messagePayload := message.Payload - replyToDestination := message.ReplyTo + fromNode := message.FromNode switch message.MessageType { case "EndorsementRequest": - go p.handleEndorsementRequest(p.ctx, messagePayload, replyToDestination) + go p.handleEndorsementRequest(p.ctx, messagePayload, fromNode) case "EndorsementResponse": go p.handleEndorsementResponse(p.ctx, messagePayload) case "DelegationRequest": - go p.handleDelegationRequest(p.ctx, messagePayload, replyToDestination) + go p.handleDelegationRequest(p.ctx, messagePayload, fromNode) case "DelegationRequestAcknowledgment": go p.handleDelegationRequestAcknowledgment(p.ctx, messagePayload) case "AssembleRequest": - go p.handleAssembleRequest(p.ctx, messagePayload, replyToDestination) + go p.handleAssembleRequest(p.ctx, messagePayload, fromNode) case "AssembleResponse": go p.handleAssembleResponse(p.ctx, messagePayload) case "AssembleError": go p.handleAssembleError(p.ctx, messagePayload) - case "StateProducedEvent": - go p.handleStateProducedEvent(p.ctx, messagePayload, replyToDestination) - case "StateAcknowledgedEvent": - go p.stateDistributer.HandleStateAcknowledgedEvent(p.ctx, message.Payload) default: log.L(ctx).Errorf("Unknown message type: %s", message.MessageType) } diff --git a/core/go/internal/privatetxnmgr/transport_writer.go b/core/go/internal/privatetxnmgr/transport_writer.go index 86eff9565..b89055d6b 100644 --- a/core/go/internal/privatetxnmgr/transport_writer.go +++ b/core/go/internal/privatetxnmgr/transport_writer.go @@ -73,12 +73,11 @@ func (tw *transportWriter) SendDelegationRequest( return err } - if err = tw.transportManager.Send(ctx, &components.TransportMessage{ + if err = tw.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "DelegationRequest", Payload: delegationRequestBytes, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, Node: delegateNodeId, - ReplyTo: tw.nodeID, }); err != nil { return err } @@ -106,12 +105,11 @@ func (tw *transportWriter) SendDelegationRequestAcknowledgment( return err } - if err = tw.transportManager.Send(ctx, &components.TransportMessage{ + if err = tw.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "DelegationRequestAcknowledgment", Payload: delegationRequestAcknowledgmentBytes, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, Node: delegatingNodeName, - ReplyTo: tw.nodeID, }); err != nil { return err } @@ -202,11 +200,10 @@ func (tw *transportWriter) SendEndorsementRequest(ctx context.Context, idempoten log.L(ctx).Error("Error marshalling endorsement request", err) return err } - err = tw.transportManager.Send(ctx, &components.TransportMessage{ + err = tw.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "EndorsementRequest", Node: targetNode, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, - ReplyTo: tw.nodeID, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, Payload: endorsementRequestBytes, }) return err @@ -233,11 +230,10 @@ func (tw *transportWriter) SendAssembleRequest(ctx context.Context, assemblingNo log.L(ctx).Error("Error marshalling assemble request", err) return err } - err = tw.transportManager.Send(ctx, &components.TransportMessage{ + err = tw.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ MessageType: "AssembleRequest", Node: assemblingNode, - Component: components.PRIVATE_TX_MANAGER_DESTINATION, - ReplyTo: tw.nodeID, + Component: prototk.PaladinMsg_TRANSACTION_ENGINE, Payload: assembleRequestBytes, }) return err diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 914a141af..103482491 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -280,22 +280,31 @@ func (tm *transportManager) queueFireAndForget(ctx context.Context, nodeName str } // See docs in components package -func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg *components.ReliableMessage) (preCommit func(), err error) { +func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msgs ...*components.ReliableMessage) (preCommit func(), err error) { - var p *peer + peers := make(map[string]*peer) + for _, msg := range msgs { + var p *peer - msg.ID = uuid.New() - msg.Created = tktypes.TimestampNow() - _, err = msg.MessageType.Validate() + msg.ID = uuid.New() + msg.Created = tktypes.TimestampNow() + _, err = msg.MessageType.Validate() - if err == nil { - p, err = tm.getPeer(ctx, msg.Node, true) + if err == nil { + p, err = tm.getPeer(ctx, msg.Node, true) + } + + if err != nil { + return nil, err + } + + peers[p.Name] = p } if err == nil { err = dbTX. WithContext(ctx). - Create(msg). + Create(msgs). Error } @@ -303,7 +312,11 @@ func (tm *transportManager) SendReliable(ctx context.Context, dbTX *gorm.DB, msg return nil, err } - return p.notifyPersistedMsgAvailable, nil + return func() { + for _, p := range peers { + p.notifyPersistedMsgAvailable() + } + }, nil } diff --git a/core/go/internal/transportmgr/manager_test.go b/core/go/internal/transportmgr/manager_test.go index 44b15ec09..7dccc30d3 100644 --- a/core/go/internal/transportmgr/manager_test.go +++ b/core/go/internal/transportmgr/manager_test.go @@ -22,6 +22,7 @@ import ( "github.com/google/uuid" "github.com/kaleido-io/paladin/config/pkg/pldconf" + "github.com/kaleido-io/paladin/core/internal/components" "github.com/kaleido-io/paladin/core/mocks/componentmocks" "github.com/kaleido-io/paladin/core/pkg/persistence" "github.com/kaleido-io/paladin/core/pkg/persistence/mockpersistence" @@ -187,3 +188,13 @@ func TestGetLocalTransportDetailsNotFail(t *testing.T) { _, err := tm.getLocalTransportDetails(ctx, tp.t.name) assert.Regexp(t, "pop", err) } + +func TestSendReliableBadMsg(t *testing.T) { + ctx, tm, _, done := newTestTransport(t, false) + defer done() + + _, err := tm.SendReliable(ctx, tm.persistence.DB(), &components.ReliableMessage{ + MessageType: components.RMTReceipt.Enum(), + }) + assert.Regexp(t, "PD012015", err) +} diff --git a/core/go/pkg/proto/engine.proto b/core/go/pkg/proto/engine.proto index 0f75dc315..99f8f52e8 100644 --- a/core/go/pkg/proto/engine.proto +++ b/core/go/pkg/proto/engine.proto @@ -96,20 +96,6 @@ message DelegationRequestAcknowledgment { string contract_address = 4; } -//To be distrubuted to all parties mentioned in the distribution list for a state, as chosen by the domain -message StateProducedEvent { - string state_id = 1; - string state_data_json = 2; - string party = 3; - string domain_name = 4; - string contract_address = 5; - string schema_id = 6; - string distribution_id = 7; //this is used to correlate the acknowledgement back to the distribution. unlike the transport message id / correlation id, this is not unique across retries - optional string nullifier_algorithm = 8; - optional string nullifier_verifier_type = 9; - optional string nullifier_payload_type = 10; -} - message StateAcknowledgedEvent { string state_id = 1; string state_data_json = 2; From f36b2f81183fbe2f6faf7d97b1fca10a747fa28b Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 15:49:51 -0500 Subject: [PATCH 28/41] Fix migrations Signed-off-by: Peter Broadhurst --- .../migrations/postgres/000014_peer_queued_messages.up.sql | 5 ++--- .../db/migrations/sqlite/000014_peer_queued_messages.up.sql | 5 +---- core/go/internal/privatetxnmgr/private_txn_mgr_test.go | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index 964b85731..3dfe2a27f 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -12,8 +12,7 @@ CREATE TABLE reliable_msgs ( "created" BIGINT NOT NULL, "node" TEXT NOT NULL, "msg_type" TEXT NOT NULL, - "metadata" TEXT , - PRIMARY KEY ("id") + "metadata" TEXT ); CREATE INDEX reliable_msgs_id ON reliable_msgs ("id"); @@ -28,4 +27,4 @@ CREATE TABLE reliable_msg_acks ( FOREIGN KEY ("id") REFERENCES reliable_msgs ("id") ON DELETE CASCADE ); - +COMMIT; \ No newline at end of file diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index 6345de101..0eaa8c6fe 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -1,5 +1,3 @@ -BEGIN; - -- These tables are replaced (data is not migrated from initial state distribution specific implementation) DROP TABLE state_distribution_acknowledgments; DROP TABLE state_distributions; @@ -12,8 +10,7 @@ CREATE TABLE reliable_msgs ( "created" BIGINT NOT NULL, "node" TEXT NOT NULL, "msg_type" TEXT NOT NULL, - "metadata" TEXT , - PRIMARY KEY ("id") + "metadata" TEXT ); CREATE INDEX reliable_msgs_id ON reliable_msgs ("id"); diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go index b7b3a8eb4..8d3b853db 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go @@ -864,7 +864,7 @@ func TestPrivateTxManagerRemoteNotaryEndorser(t *testing.T) { localNodeMocks.transportManager.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { go func() { - assert.Equal(t, remoteNodeName, args.Get(1).(*components.ReceivedMessage).FromNode) + assert.Equal(t, remoteNodeName, args.Get(1).(*components.FireAndForgetMessageSend).Node) send := args.Get(1).(*components.FireAndForgetMessageSend) remoteEngine.HandlePaladinMsg(ctx, inMsgToOut(localNodeName, send)) }() From 0747909ef32d219b143c538a4d95e820a040e039 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 16:13:27 -0500 Subject: [PATCH 29/41] Allow sender to specify message ID for correlation Signed-off-by: Peter Broadhurst --- core/go/internal/components/transportmgr.go | 1 + core/go/internal/identityresolver/identityresolver.go | 1 + core/go/internal/transportmgr/manager.go | 6 +++++- core/go/internal/transportmgr/peer.go | 3 +++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/core/go/internal/components/transportmgr.go b/core/go/internal/components/transportmgr.go index e2aa7058f..f89af1237 100644 --- a/core/go/internal/components/transportmgr.go +++ b/core/go/internal/components/transportmgr.go @@ -30,6 +30,7 @@ import ( type FireAndForgetMessageSend struct { Node string Component prototk.PaladinMsg_Component + MessageID *uuid.UUID // optionally supplied by caller for request/reply correlation CorrelationID *uuid.UUID MessageType string Payload []byte diff --git a/core/go/internal/identityresolver/identityresolver.go b/core/go/internal/identityresolver/identityresolver.go index bd48ac3bd..1177304db 100644 --- a/core/go/internal/identityresolver/identityresolver.go +++ b/core/go/internal/identityresolver/identityresolver.go @@ -174,6 +174,7 @@ func (ir *identityResolver) ResolveVerifierAsync(ctx context.Context, lookup str } err = ir.transportManager.Send(ctx, &components.FireAndForgetMessageSend{ + MessageID: &requestID, MessageType: "ResolveVerifierRequest", Component: prototk.PaladinMsg_IDENTITY_RESOLVER, Node: remoteNodeId, diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index 103482491..ebe23aad0 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -241,8 +241,12 @@ func (tm *transportManager) Send(ctx context.Context, send *components.FireAndFo return i18n.NewError(ctx, msgs.MsgTransportInvalidMessage) } + if send.MessageID == nil { + msgID := uuid.New() + send.MessageID = &msgID + } msg := &prototk.PaladinMsg{ - MessageId: uuid.NewString(), + MessageId: send.MessageID.String(), MessageType: send.MessageType, Component: send.Component, Payload: send.Payload, diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index c15bfab40..6fae3fac5 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -259,6 +259,7 @@ func (p *peer) send(msg *prototk.PaladinMsg, reliableSeq *uint64) error { err := p.tm.sendShortRetry.Do(p.ctx, func(attempt int) (retryable bool, err error) { return true, p.transport.send(p.ctx, p.Name, msg) }) + log.L(p.ctx).Infof("Sent %s/%s message %s to %s (cid=%s)", msg.Component.String(), msg.MessageType, msg.MessageId, p.Name, tktypes.StrOrEmpty(msg.CorrelationId)) if err == nil { now := tktypes.TimestampNow() p.statsLock.Lock() @@ -277,6 +278,8 @@ func (p *peer) send(msg *prototk.PaladinMsg, reliableSeq *uint64) error { } func (p *peer) updateReceivedStats(msg *prototk.PaladinMsg) { + log.L(p.ctx).Infof("Received %s/%s message %s from %s (cid=%s)", msg.Component.String(), msg.MessageType, msg.MessageId, p.Name, tktypes.StrOrEmpty(msg.CorrelationId)) + now := tktypes.TimestampNow() p.statsLock.Lock() defer p.statsLock.Unlock() From 3393a64559147765482724e39c5c0cfb37396212 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 20:14:06 -0500 Subject: [PATCH 30/41] Update DomainContext export/import to be full snapshot with states Signed-off-by: Peter Broadhurst --- core/go/componenttest/component_test.go | 11 +- .../000014_peer_queued_messages.up.sql | 2 +- .../sqlite/000014_peer_queued_messages.up.sql | 2 +- core/go/internal/components/statemgr.go | 14 +- core/go/internal/domainmgr/domain_test.go | 2 +- .../domainmgr/private_smart_contract.go | 22 +- core/go/internal/msgs/en_errors.go | 5 +- .../privatetxnmgr/assemble_and_sign.go | 2 +- .../privatetxnmgr/assemble_coordinator.go | 2 +- .../privatetxnmgr/private_txn_mgr_test.go | 4 +- .../privatetxnmgr/sequencer_dispatch.go | 2 + .../privatetxnmgr/syncpoints/dispatch.go | 25 ++- core/go/internal/statemgr/domain_context.go | 73 ++++--- .../internal/statemgr/domain_context_test.go | 200 +++++++++++------- .../go/internal/statemgr/state_status_test.go | 6 +- .../transportmgr/reliable_msg_handler.go | 3 +- toolkit/go/pkg/pldapi/states.go | 2 +- 17 files changed, 226 insertions(+), 151 deletions(-) diff --git a/core/go/componenttest/component_test.go b/core/go/componenttest/component_test.go index f8390324d..a3d74eeb7 100644 --- a/core/go/componenttest/component_test.go +++ b/core/go/componenttest/component_test.go @@ -915,12 +915,17 @@ func TestNotaryDelegatedPrepare(t *testing.T) { assert.Eventually(t, func() bool { - preparedTx := pldapi.PreparedTransaction{} + var preparedTx *pldapi.PreparedTransaction - err = client1.CallRPC(ctx, &preparedTx, "ptx_getPreparedTransaction", transferA2BTxId) + // The transaction is prepared with a from-address that is local to node3 - so only + // node3 will be able to send it. So that's where it gets persisted. + err = client3.CallRPC(ctx, &preparedTx, "ptx_getPreparedTransaction", transferA2BTxId) require.NoError(t, err) - assert.Empty(t, preparedTx.Transaction.Domain) + if preparedTx == nil { + return false + } + assert.Empty(t, preparedTx.Transaction.Domain) return preparedTx.ID == transferA2BTxId && len(preparedTx.States.Spent) == 1 && len(preparedTx.States.Confirmed) == 2 }, diff --git a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql index 3dfe2a27f..ca931119e 100644 --- a/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/postgres/000014_peer_queued_messages.up.sql @@ -15,7 +15,7 @@ CREATE TABLE reliable_msgs ( "metadata" TEXT ); -CREATE INDEX reliable_msgs_id ON reliable_msgs ("id"); +CREATE UNIQUE INDEX reliable_msgs_id ON reliable_msgs ("id"); CREATE INDEX reliable_msgs_node ON reliable_msgs ("node"); CREATE INDEX reliable_msgs_created ON reliable_msgs ("created"); diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql index 0eaa8c6fe..edbd548b7 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.up.sql @@ -13,7 +13,7 @@ CREATE TABLE reliable_msgs ( "metadata" TEXT ); -CREATE INDEX reliable_msgs_id ON reliable_msgs ("id"); +CREATE UNIQUE INDEX reliable_msgs_id ON reliable_msgs ("id"); CREATE INDEX reliable_msgs_node ON reliable_msgs ("node"); CREATE INDEX reliable_msgs_created ON reliable_msgs ("created"); diff --git a/core/go/internal/components/statemgr.go b/core/go/internal/components/statemgr.go index 7113d1a45..2f268a9bd 100644 --- a/core/go/internal/components/statemgr.go +++ b/core/go/internal/components/statemgr.go @@ -97,10 +97,10 @@ type DomainContext interface { FindAvailableStates(dbTX *gorm.DB, schemaID tktypes.Bytes32, query *query.QueryJSON) (Schema, []*pldapi.State, error) // Return a snapshot of all currently known state locks - ExportStateLocks() ([]byte, error) + ExportSnapshot() ([]byte, error) - // ImportStateLocks is used to restore the state of the domain context, by adding a set of locks - ImportStateLocks([]byte) error + // ImportSnapshot is used to restore the state of the domain context, by adding a set of locks + ImportSnapshot([]byte) error // FindAvailableNullifiers is similar to FindAvailableStates, but for domains that leverage // nullifiers to record spending. @@ -171,10 +171,10 @@ type DomainContext interface { } type StateUpsert struct { - ID tktypes.HexBytes - SchemaID tktypes.Bytes32 - Data tktypes.RawJSON - CreatedBy *uuid.UUID + ID tktypes.HexBytes `json:"id"` + Schema tktypes.Bytes32 `json:"schema"` + Data tktypes.RawJSON `json:"data"` + CreatedBy *uuid.UUID `json:"createdBy,omitempty"` // not exported } type StateUpsertOutsideContext struct { diff --git a/core/go/internal/domainmgr/domain_test.go b/core/go/internal/domainmgr/domain_test.go index 78b6a79d4..585dfa20e 100644 --- a/core/go/internal/domainmgr/domain_test.go +++ b/core/go/internal/domainmgr/domain_test.go @@ -519,7 +519,7 @@ func storeTestState(t *testing.T, td *testDomainContext, txID uuid.UUID, amount // Call the real statestore _, err = td.c.dCtx.UpsertStates(td.c.dbTX, &components.StateUpsert{ - SchemaID: tktypes.MustParseBytes32(td.tp.stateSchemas[0].Id), + Schema: tktypes.MustParseBytes32(td.tp.stateSchemas[0].Id), Data: stateJSON, CreatedBy: &txID, }) diff --git a/core/go/internal/domainmgr/private_smart_contract.go b/core/go/internal/domainmgr/private_smart_contract.go index 1fdeab9dc..f1497f189 100644 --- a/core/go/internal/domainmgr/private_smart_contract.go +++ b/core/go/internal/domainmgr/private_smart_contract.go @@ -310,9 +310,9 @@ func (dc *domainContract) upsertPotentialStates(dCtx components.DomainContext, r } } stateUpsert := &components.StateUpsert{ - ID: id, - SchemaID: schema.ID(), - Data: tktypes.RawJSON(s.StateDataJson), + ID: id, + Schema: schema.ID(), + Data: tktypes.RawJSON(s.StateDataJson), } if isOutput { // These are marked as locked and creating in the transaction, and become available for other transaction to read @@ -366,14 +366,14 @@ func (dc *domainContract) LockStates(dCtx components.DomainContext, readTX *gorm inputIDs := make([]string, len(postAssembly.InputStates)) for i, s := range postAssembly.InputStates { stateLocks = append(stateLocks, &pldapi.StateLock{ - State: s.ID, + StateID: s.ID, DomainName: domainName, Transaction: tx.ID, Type: pldapi.StateLockTypeSpend.Enum(), }) states = append(states, &components.StateUpsert{ ID: s.ID, - SchemaID: s.Schema, + Schema: s.Schema, Data: s.Data, CreatedBy: nil, // we are not responsible for creation of the state }) @@ -382,14 +382,14 @@ func (dc *domainContract) LockStates(dCtx components.DomainContext, readTX *gorm readIDs := make([]string, len(postAssembly.ReadStates)) for i, s := range postAssembly.ReadStates { stateLocks = append(stateLocks, &pldapi.StateLock{ - State: s.ID, + StateID: s.ID, DomainName: domainName, Transaction: tx.ID, Type: pldapi.StateLockTypeRead.Enum(), }) states = append(states, &components.StateUpsert{ ID: s.ID, - SchemaID: s.Schema, + Schema: s.Schema, Data: s.Data, CreatedBy: nil, // we are not responsible for creation of the state }) @@ -401,7 +401,7 @@ func (dc *domainContract) LockStates(dCtx components.DomainContext, readTX *gorm for i, s := range postAssembly.OutputStates { states = append(states, &components.StateUpsert{ ID: s.ID, - SchemaID: s.Schema, + Schema: s.Schema, Data: s.Data, CreatedBy: &tx.ID, // output states have create-locks to the transaction }) @@ -412,9 +412,9 @@ func (dc *domainContract) LockStates(dCtx components.DomainContext, readTX *gorm infoIDs := make([]string, len(postAssembly.InfoStates)) for i, s := range postAssembly.InfoStates { states = append(states, &components.StateUpsert{ - ID: s.ID, - SchemaID: s.Schema, - Data: s.Data, + ID: s.ID, + Schema: s.Schema, + Data: s.Data, }) infoIDs[i] = s.ID.String() } diff --git a/core/go/internal/msgs/en_errors.go b/core/go/internal/msgs/en_errors.go index b77a48830..bc4bce2b9 100644 --- a/core/go/internal/msgs/en_errors.go +++ b/core/go/internal/msgs/en_errors.go @@ -106,6 +106,7 @@ var ( MsgStateIDMissing = ffe("PD010130", "The state id must be supplied for this domain") MsgStateFlushInProgress = ffe("PD010131", "A flush is already in progress for this domain context") MsgDomainContextImportInvalidJSON = ffe("PD010132", "Attempted to import state locks but the JSON could not be parsed") + MsgDomainContextImportBadStates = ffe("PD010133", "Attempted to import state failed") // Persistence PD0102XX MsgPersistenceInvalidType = ffe("PD010200", "Invalid persistence type: %s") @@ -126,9 +127,9 @@ var ( MsgTransactionProcessorEmptyAssembledResult = ffe("PD010305", "No transaction was assembled for transaction with ID: %s") // Transaction store PD0104XX - MsgTransactionMissingField = ffe("PD010400", "Must provide a payload (one of PayloadJSON or PayloadRLP), from, and contract address. Mising %v") + MsgTransactionMissingField = ffe("PD010400", "Must provide a payload (one of PayloadJSON or PayloadRLP), from, and contract address. Missing %v") MsgTransactionParseError = ffe("PD010401", "Failed to parse transaction message.") - MsgTransactionSerializeError = ffe("PD010402", "Failed to serialise transaction response.") + MsgTransactionSerializeError = ffe("PD010402", "Failed to serialize transaction response.") MsgTransactionInvalidTransactionID = ffe("PD010403", "The provided ID %s cannot be parsed into a valid UUID due to %s") // Key manager PD0105XX diff --git a/core/go/internal/privatetxnmgr/assemble_and_sign.go b/core/go/internal/privatetxnmgr/assemble_and_sign.go index cf3ac9f5b..84587e334 100644 --- a/core/go/internal/privatetxnmgr/assemble_and_sign.go +++ b/core/go/internal/privatetxnmgr/assemble_and_sign.go @@ -40,7 +40,7 @@ func (s *Sequencer) assembleForRemoteCoordinator(ctx context.Context, transactio // if our block height is ahead of the coordinator, there is a small chance that we we assemble a transaction that the coordinator will not be able to // endorse yet but it is better to wait around on the endorsement flow than to wait around on the assemble flow which is single threaded per domain - err := s.delegateDomainContext.ImportStateLocks(stateLocksJSON) + err := s.delegateDomainContext.ImportSnapshot(stateLocksJSON) if err != nil { log.L(ctx).Errorf("assembleForRemoteCoordinator: Error importing state locks: %s", err) return nil, err diff --git a/core/go/internal/privatetxnmgr/assemble_coordinator.go b/core/go/internal/privatetxnmgr/assemble_coordinator.go index 549b8d81a..e5bef5d15 100644 --- a/core/go/internal/privatetxnmgr/assemble_coordinator.go +++ b/core/go/internal/privatetxnmgr/assemble_coordinator.go @@ -182,7 +182,7 @@ func (req *assembleRequest) processRemote(ctx context.Context, assemblingNode st log.L(ctx).Debugf("assembleRequest:processRemote requestID %s", requestID) - stateLocksJSON, err := req.assembleCoordinator.domainContext.ExportStateLocks() + stateLocksJSON, err := req.assembleCoordinator.domainContext.ExportSnapshot() if err != nil { return err } diff --git a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go index 8d3b853db..f3065a324 100644 --- a/core/go/internal/privatetxnmgr/private_txn_mgr_test.go +++ b/core/go/internal/privatetxnmgr/private_txn_mgr_test.go @@ -2851,8 +2851,8 @@ func NewPrivateTransactionMgrForPackageTesting(t *testing.T, nodeName string) (p mocks.domainContext.On("Ctx").Return(ctx).Maybe() mocks.domainContext.On("Info").Return(components.DomainContextInfo{ID: uuid.New()}).Maybe() - mocks.domainContext.On("ExportStateLocks").Return([]byte("[]"), nil).Maybe() - mocks.domainContext.On("ImportStateLocks", mock.Anything).Return(nil).Maybe() + mocks.domainContext.On("ExportSnapshot").Return([]byte("[]"), nil).Maybe() + mocks.domainContext.On("ImportSnapshot", mock.Anything).Return(nil).Maybe() e := NewPrivateTransactionMgr(ctx, &pldconf.PrivateTxManagerConfig{ Writer: pldconf.FlushWriterConfig{ diff --git a/core/go/internal/privatetxnmgr/sequencer_dispatch.go b/core/go/internal/privatetxnmgr/sequencer_dispatch.go index 82e35972b..d9318e391 100644 --- a/core/go/internal/privatetxnmgr/sequencer_dispatch.go +++ b/core/go/internal/privatetxnmgr/sequencer_dispatch.go @@ -87,6 +87,8 @@ func (s *Sequencer) DispatchTransactions(ctx context.Context, dispatchableTransa log.L(ctx).Infof("Result of transaction %s is a prepared transaction public=%t private=%t", preparedTransaction.ID, hasPublicTransaction, hasPrivateTransaction) preparedTransactionWithRefs := mapPreparedTransaction(preparedTransaction) dispatchBatch.PreparedTransactions = append(dispatchBatch.PreparedTransactions, preparedTransactionWithRefs) + + // The prepared transaction needs to end up on the node that is able to submit it. preparedTxnDistributions = append(preparedTxnDistributions, preparedTransactionWithRefs) default: diff --git a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go index cbd50915b..aa34238f2 100644 --- a/core/go/internal/privatetxnmgr/syncpoints/dispatch.go +++ b/core/go/internal/privatetxnmgr/syncpoints/dispatch.go @@ -30,7 +30,7 @@ import ( type dispatchOperation struct { publicDispatches []*PublicDispatch privateDispatches []*components.ValidatedTransaction - preparedTransactions []*components.PreparedTransactionWithRefs + localPreparedTxns []*components.PreparedTransactionWithRefs preparedReliableMsgs []*components.ReliableMessage } @@ -62,13 +62,18 @@ func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contrac preparedReliableMsgs := make([]*components.ReliableMessage, 0, len(dispatchBatch.PreparedTransactions)+len(stateDistributions)) + var localPreparedTxns []*components.PreparedTransactionWithRefs for _, preparedTxnDistribution := range preparedTxnDistributions { node, _ := tktypes.PrivateIdentityLocator(preparedTxnDistribution.Transaction.From).Node(dCtx.Ctx(), false) - preparedReliableMsgs = append(preparedReliableMsgs, &components.ReliableMessage{ - Node: node, - MessageType: components.RMTPreparedTransaction.Enum(), - Metadata: tktypes.JSONString(preparedTxnDistribution), - }) + if node != s.transportMgr.LocalNodeName() { + preparedReliableMsgs = append(preparedReliableMsgs, &components.ReliableMessage{ + Node: node, + MessageType: components.RMTPreparedTransaction.Enum(), + Metadata: tktypes.JSONString(preparedTxnDistribution), + }) + } else { + localPreparedTxns = append(localPreparedTxns, preparedTxnDistribution) + } } for _, stateDistribution := range stateDistributions { @@ -87,7 +92,7 @@ func (s *syncPoints) PersistDispatchBatch(dCtx components.DomainContext, contrac dispatchOperation: &dispatchOperation{ publicDispatches: dispatchBatch.PublicDispatches, privateDispatches: dispatchBatch.PrivateDispatches, - preparedTransactions: dispatchBatch.PreparedTransactions, + localPreparedTxns: localPreparedTxns, preparedReliableMsgs: preparedReliableMsgs, }, }) @@ -177,10 +182,10 @@ func (s *syncPoints) writeDispatchOperations(ctx context.Context, dbTX *gorm.DB, postCommits = append(postCommits, txPostCommit) } - if len(op.preparedTransactions) > 0 { - log.L(ctx).Debugf("Writing prepared transactions locally %d", len(op.preparedTransactions)) + if len(op.localPreparedTxns) > 0 { + log.L(ctx).Debugf("Writing prepared transactions locally %d", len(op.localPreparedTxns)) - txPostCommit, err := s.txMgr.WritePreparedTransactions(ctx, dbTX, op.preparedTransactions) + txPostCommit, err := s.txMgr.WritePreparedTransactions(ctx, dbTX, op.localPreparedTxns) if err != nil { log.L(ctx).Errorf("Error persisting prepared transactions: %s", err) return nil, err diff --git a/core/go/internal/statemgr/domain_context.go b/core/go/internal/statemgr/domain_context.go index 25971e57a..b534a87dc 100644 --- a/core/go/internal/statemgr/domain_context.go +++ b/core/go/internal/statemgr/domain_context.go @@ -119,7 +119,7 @@ func (dc *domainContext) getUnFlushedSpends() (spending []tktypes.HexBytes, null for _, l := range dc.txLocks { if l.Type.V() == pldapi.StateLockTypeSpend { - spending = append(spending, l.State) + spending = append(spending, l.StateID) } } nullifiers = append(nullifiers, dc.unFlushed.stateNullifiers...) @@ -150,7 +150,7 @@ func (dc *domainContext) mergeUnFlushedApplyLocks(schema components.Schema, dbSt } spent := false for _, lock := range dc.txLocks { - if lock.State.Equals(state.ID) && lock.Type.V() == pldapi.StateLockTypeSpend { + if lock.StateID.Equals(state.ID) && lock.Type.V() == pldapi.StateLockTypeSpend { spent = true break } @@ -295,13 +295,17 @@ func (dc *domainContext) FindAvailableNullifiers(dbTX *gorm.DB, schemaID tktypes } func (dc *domainContext) UpsertStates(dbTX *gorm.DB, stateUpserts ...*components.StateUpsert) (states []*pldapi.State, err error) { + return dc.upsertStates(dbTX, false, stateUpserts...) +} + +func (dc *domainContext) upsertStates(dbTX *gorm.DB, holdingLock bool, stateUpserts ...*components.StateUpsert) (states []*pldapi.State, err error) { states = make([]*pldapi.State, len(stateUpserts)) stateLocks := make([]*pldapi.StateLock, 0, len(stateUpserts)) withValues := make([]*components.StateWithLabels, len(stateUpserts)) toMakeAvailable := make([]*components.StateWithLabels, 0, len(stateUpserts)) for i, ns := range stateUpserts { - schema, err := dc.ss.GetSchema(dc, dbTX, dc.domainName, ns.SchemaID, true) + schema, err := dc.ss.GetSchema(dc, dbTX, dc.domainName, ns.Schema, true) if err != nil { return nil, err } @@ -316,7 +320,7 @@ func (dc *domainContext) UpsertStates(dbTX *gorm.DB, stateUpserts ...*components createLock := &pldapi.StateLock{ Type: pldapi.StateLockTypeCreate.Enum(), Transaction: *ns.CreatedBy, - State: withValues[i].State.ID, + StateID: withValues[i].State.ID, } stateLocks = append(stateLocks, createLock) toMakeAvailable = append(toMakeAvailable, vs) @@ -327,8 +331,10 @@ func (dc *domainContext) UpsertStates(dbTX *gorm.DB, stateUpserts ...*components } // Take lock and check flush state - dc.stateLock.Lock() - defer dc.stateLock.Unlock() + if !holdingLock { + dc.stateLock.Lock() + defer dc.stateLock.Unlock() + } if flushErr := dc.checkResetInitUnFlushed(); flushErr != nil { return nil, flushErr } @@ -386,18 +392,18 @@ func (dc *domainContext) addStateLocks(locks ...*pldapi.StateLock) error { if l.Transaction == (uuid.UUID{}) { return i18n.NewError(dc, msgs.MsgStateLockNoTransaction) - } else if len(l.State) == 0 { + } else if len(l.StateID) == 0 { return i18n.NewError(dc, msgs.MsgStateLockNoState) } // For creating the state must be in our map (via Upsert) or we will fail to return it - creatingState := dc.creatingStates[l.State.String()] + creatingState := dc.creatingStates[l.StateID.String()] if lockType == pldapi.StateLockTypeCreate && creatingState == nil { - return i18n.NewError(dc, msgs.MsgStateLockCreateNotInContext, l.State) + return i18n.NewError(dc, msgs.MsgStateLockCreateNotInContext, l.StateID) } // Note we do NOT check for conflicts on existing state locks - log.L(dc).Debugf("state %s adding %s lock tx=%s)", l.State, lockType, l.Transaction) + log.L(dc).Debugf("state %s adding %s lock tx=%s)", l.StateID, lockType, l.Transaction) dc.txLocks = append(dc.txLocks, l) } return nil @@ -407,7 +413,7 @@ func (dc *domainContext) applyLocks(states []*pldapi.State) []*pldapi.State { for _, s := range states { s.Locks = []*pldapi.StateLock{} for _, l := range dc.txLocks { - if l.State.Equals(s.ID) { + if l.StateID.Equals(s.ID) { s.Locks = append(s.Locks, l) } } @@ -442,7 +448,7 @@ func (dc *domainContext) ResetTransactions(transactions ...uuid.UUID) { if lock.Transaction == tx { if lock.Type.V() == pldapi.StateLockTypeCreate { // Clean up the creating record - delete(dc.creatingStates, lock.State.String()) + delete(dc.creatingStates, lock.StateID.String()) } skip = true break @@ -578,15 +584,20 @@ func (dc *domainContext) checkResetInitUnFlushed() error { return nil } +type exportSnapshot struct { + States []*components.StateUpsert `json:"states"` + Locks []*exportableStateLock `json:"locks"` +} + // pldapi.StateLocks do not include the stateID in the serialized JSON so we need to define a new struct to include it type exportableStateLock struct { - State tktypes.HexBytes `json:"stateID"` + State tktypes.HexBytes `json:"stateId"` Transaction uuid.UUID `json:"transaction"` Type tktypes.Enum[pldapi.StateLockType] `json:"type"` } // Return a snapshot of all currently known state locks as serialized JSON -func (dc *domainContext) ExportStateLocks() ([]byte, error) { +func (dc *domainContext) ExportSnapshot() ([]byte, error) { dc.stateLock.Lock() defer dc.stateLock.Unlock() if flushErr := dc.checkResetInitUnFlushed(); flushErr != nil { @@ -595,32 +606,46 @@ func (dc *domainContext) ExportStateLocks() ([]byte, error) { locks := make([]*exportableStateLock, 0, len(dc.txLocks)) for _, l := range dc.txLocks { locks = append(locks, &exportableStateLock{ - State: l.State, + State: l.StateID, Transaction: l.Transaction, Type: l.Type, }) } - return json.Marshal(locks) + states := make([]*components.StateUpsert, 0, len(dc.creatingStates)) + for _, s := range dc.creatingStates { + states = append(states, &components.StateUpsert{ + ID: s.ID, + Schema: s.Schema, + Data: s.Data, + }) + } + return json.Marshal(&exportSnapshot{ + States: states, + Locks: locks, + }) } -// ImportStateLocks is used to restore the state of the domain context, by adding a set of locks -func (dc *domainContext) ImportStateLocks(stateLocksJSON []byte) error { +// ImportSnapshot is used to restore the state of the domain context, by adding a set of locks +func (dc *domainContext) ImportSnapshot(stateLocksJSON []byte) error { dc.stateLock.Lock() defer dc.stateLock.Unlock() if flushErr := dc.checkResetInitUnFlushed(); flushErr != nil { return flushErr } - locks := make([]*exportableStateLock, 0) - err := json.Unmarshal(stateLocksJSON, &locks) + var snapshot exportSnapshot + err := json.Unmarshal(stateLocksJSON, &snapshot) if err != nil { return i18n.WrapError(dc, err, msgs.MsgDomainContextImportInvalidJSON) } dc.creatingStates = make(map[string]*components.StateWithLabels) - dc.txLocks = make([]*pldapi.StateLock, 0, len(locks)) - for _, l := range locks { + dc.txLocks = make([]*pldapi.StateLock, 0, len(snapshot.Locks)) + if _, err = dc.upsertStates(dc.ss.p.DB(), true /* already hold lock */, snapshot.States...); err != nil { + return i18n.WrapError(dc, err, msgs.MsgDomainContextImportBadStates) + } + for _, l := range snapshot.Locks { dc.txLocks = append(dc.txLocks, &pldapi.StateLock{ DomainName: dc.domainName, - State: l.State, + StateID: l.State, Transaction: l.Transaction, Type: l.Type, }) @@ -641,7 +666,7 @@ func (dc *domainContext) ImportStateLocks(stateLocksJSON []byte) error { // 2. the state distribution message is never going to arrive because we are not on the distribution list // We can't tell the difference between these two cases so can't really fail here // It is up to the domain to ensure that they ask for the transaction to be `Park`ed temporarily if they suspect `1` - log.L(dc).Infof("ImportStateLocks: state %s not found in unflushed states", l.State) + log.L(dc).Infof("ImportSnapshot: state %s not found in unflushed states", l.State) } } } diff --git a/core/go/internal/statemgr/domain_context_test.go b/core/go/internal/statemgr/domain_context_test.go index 4bc206b76..89e12c38d 100644 --- a/core/go/internal/statemgr/domain_context_test.go +++ b/core/go/internal/statemgr/domain_context_test.go @@ -162,16 +162,16 @@ func TestUpsertSchemaAndStates(t *testing.T) { defer dc.Close() upsert1 := &components.StateUpsert{ - ID: fakeHash1, - SchemaID: schemaID, - Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 100, "owner": "0x1eDfD974fE6828dE81a1a762df680111870B7cDD", "salt": "%s"}`, tktypes.RandHex(32))), + ID: fakeHash1, + Schema: schemaID, + Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 100, "owner": "0x1eDfD974fE6828dE81a1a762df680111870B7cDD", "salt": "%s"}`, tktypes.RandHex(32))), } states, err := dc.UpsertStates(ss.p.DB(), upsert1, &components.StateUpsert{ - ID: fakeHash2, - SchemaID: schemaID, - Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 100, "owner": "0x1eDfD974fE6828dE81a1a762df680111870B7cDD", "salt": "%s"}`, tktypes.RandHex(32))), + ID: fakeHash2, + Schema: schemaID, + Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 100, "owner": "0x1eDfD974fE6828dE81a1a762df680111870B7cDD", "salt": "%s"}`, tktypes.RandHex(32))), }, ) require.NoError(t, err) @@ -203,7 +203,7 @@ func TestStateLockErrorsTransaction(t *testing.T) { zeroTxn := uuid.UUID{} _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ - SchemaID: schemas[0].ID(), + Schema: schemas[0].ID(), Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 100, "owner": "0x1eDfD974fE6828dE81a1a762df680111870B7cDD", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &zeroTxn, }, @@ -229,7 +229,7 @@ func TestStateLockErrorsTransaction(t *testing.T) { err = dc.AddStateLocks(&pldapi.StateLock{ Type: pldapi.StateLockTypeCreate.Enum(), - State: tktypes.RandBytes(32), + StateID: tktypes.RandBytes(32), Transaction: txn1, }) require.Regexp(t, "PD010118", err) // create lock for state not in context @@ -259,10 +259,10 @@ func TestStateContextMintSpendMint(t *testing.T) { // Store some states tx1states, err := dc.UpsertStates(ss.p.DB(), - &components.StateUpsert{SchemaID: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 100, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID1}, - &components.StateUpsert{SchemaID: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 10, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID1}, - &components.StateUpsert{SchemaID: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 75, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID1}, - &components.StateUpsert{SchemaID: infoSchema, Data: tktypes.RawJSON(fmt.Sprintf(`{"info": "some info", "salt": "%s"}`, tktypes.RandHex(32)))}, + &components.StateUpsert{Schema: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 100, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID1}, + &components.StateUpsert{Schema: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 10, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID1}, + &components.StateUpsert{Schema: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 75, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID1}, + &components.StateUpsert{Schema: infoSchema, Data: tktypes.RawJSON(fmt.Sprintf(`{"info": "some info", "salt": "%s"}`, tktypes.RandHex(32)))}, ) require.NoError(t, err) assert.Len(t, tx1states, 4) @@ -270,7 +270,7 @@ func TestStateContextMintSpendMint(t *testing.T) { // Mark an in-memory read - doesn't affect it's availability transactionID2 := uuid.New() err = dc.AddStateLocks( - &pldapi.StateLock{Type: pldapi.StateLockTypeRead.Enum(), State: tx1states[1].ID, Transaction: transactionID2}, + &pldapi.StateLock{Type: pldapi.StateLockTypeRead.Enum(), StateID: tx1states[1].ID, Transaction: transactionID2}, ) require.NoError(t, err) @@ -279,12 +279,12 @@ func TestStateContextMintSpendMint(t *testing.T) { lockView := dc.StateLocksByTransaction() assert.Equal(t, map[uuid.UUID][]pldapi.StateLock{ transactionID1: { - {Type: pldapi.StateLockTypeCreate.Enum(), State: tx1states[0].ID, Transaction: transactionID1}, - {Type: pldapi.StateLockTypeCreate.Enum(), State: tx1states[1].ID, Transaction: transactionID1}, - {Type: pldapi.StateLockTypeCreate.Enum(), State: tx1states[2].ID, Transaction: transactionID1}, + {Type: pldapi.StateLockTypeCreate.Enum(), StateID: tx1states[0].ID, Transaction: transactionID1}, + {Type: pldapi.StateLockTypeCreate.Enum(), StateID: tx1states[1].ID, Transaction: transactionID1}, + {Type: pldapi.StateLockTypeCreate.Enum(), StateID: tx1states[2].ID, Transaction: transactionID1}, }, transactionID2: { - {Type: pldapi.StateLockTypeRead.Enum(), State: tx1states[1].ID, Transaction: transactionID2}, + {Type: pldapi.StateLockTypeRead.Enum(), StateID: tx1states[1].ID, Transaction: transactionID2}, }, }, lockView) @@ -307,15 +307,15 @@ func TestStateContextMintSpendMint(t *testing.T) { // Simulate a transaction where we spend two states, and create 2 new ones transactionID3 := uuid.New() err = dc.AddStateLocks( - &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), State: tx1states[1].ID, Transaction: transactionID3}, // 10 + - &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), State: tx1states[2].ID, Transaction: transactionID3}, // 75 + + &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), StateID: tx1states[1].ID, Transaction: transactionID3}, // 10 + + &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), StateID: tx1states[2].ID, Transaction: transactionID3}, // 75 + ) require.NoError(t, err) // Do a quick check on upsert semantics with un-flushed updates, to make sure the unflushed list doesn't dup tx3states, err := dc.UpsertStates(ss.p.DB(), - &components.StateUpsert{SchemaID: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 35, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID3}, - &components.StateUpsert{SchemaID: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 50, "owner": "0x615dD09124271D8008225054d85Ffe720E7a447A", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID3}, + &components.StateUpsert{Schema: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 35, "owner": "0xf7b1c69F5690993F2C8ecE56cc89D42b1e737180", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID3}, + &components.StateUpsert{Schema: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 50, "owner": "0x615dD09124271D8008225054d85Ffe720E7a447A", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID3}, ) require.NoError(t, err) assert.Len(t, tx3states, 2) @@ -351,13 +351,13 @@ func TestStateContextMintSpendMint(t *testing.T) { // Write another transaction that splits a coin to two transactionID4 := uuid.New() err = dc.AddStateLocks( - &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), State: tx3states[1].ID, Transaction: transactionID4}, // 50 - &pldapi.StateLock{Type: pldapi.StateLockTypeRead.Enum(), State: tx1states[0].ID, Transaction: transactionID4}, // 100 + &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), StateID: tx3states[1].ID, Transaction: transactionID4}, // 50 + &pldapi.StateLock{Type: pldapi.StateLockTypeRead.Enum(), StateID: tx1states[0].ID, Transaction: transactionID4}, // 100 ) require.NoError(t, err) tx4states, err := dc.UpsertStates(ss.p.DB(), - &components.StateUpsert{SchemaID: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 20, "owner": "0x615dD09124271D8008225054d85Ffe720E7a447A", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID4}, - &components.StateUpsert{SchemaID: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 30, "owner": "0x615dD09124271D8008225054d85Ffe720E7a447A", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID4}, + &components.StateUpsert{Schema: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 20, "owner": "0x615dD09124271D8008225054d85Ffe720E7a447A", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID4}, + &components.StateUpsert{Schema: schemaID, Data: tktypes.RawJSON(fmt.Sprintf(`{"amount": 30, "owner": "0x615dD09124271D8008225054d85Ffe720E7a447A", "salt": "%s"}`, tktypes.RandHex(32))), CreatedBy: &transactionID4}, ) require.NoError(t, err) assert.Len(t, tx4states, 2) @@ -386,11 +386,11 @@ func TestStateContextMintSpendMint(t *testing.T) { for _, lock := range dc.txLocks { switch lock.Type.V() { case pldapi.StateLockTypeSpend: - spends = append(spends, &pldapi.StateSpendRecord{DomainName: "domain1", State: lock.State, Transaction: lock.Transaction}) + spends = append(spends, &pldapi.StateSpendRecord{DomainName: "domain1", State: lock.StateID, Transaction: lock.Transaction}) case pldapi.StateLockTypeRead: - reads = append(reads, &pldapi.StateReadRecord{DomainName: "domain1", State: lock.State, Transaction: lock.Transaction}) + reads = append(reads, &pldapi.StateReadRecord{DomainName: "domain1", State: lock.StateID, Transaction: lock.Transaction}) case pldapi.StateLockTypeCreate: - confirms = append(confirms, &pldapi.StateConfirmRecord{DomainName: "domain1", State: lock.State, Transaction: lock.Transaction}) + confirms = append(confirms, &pldapi.StateConfirmRecord{DomainName: "domain1", State: lock.StateID, Transaction: lock.Transaction}) } } // We add one extra spend that simulates something happening outside of this context @@ -514,8 +514,8 @@ func TestStateContextMintSpendWithNullifier(t *testing.T) { // Start with 2 states tx1states, err := dc.UpsertStates(ss.p.DB(), - &components.StateUpsert{ID: stateID1, SchemaID: schemaID, Data: data1, CreatedBy: &transactionID1}, - &components.StateUpsert{ID: stateID2, SchemaID: schemaID, Data: data2, CreatedBy: &transactionID1}, + &components.StateUpsert{ID: stateID1, Schema: schemaID, Data: data1, CreatedBy: &transactionID1}, + &components.StateUpsert{ID: stateID2, Schema: schemaID, Data: data2, CreatedBy: &transactionID1}, ) require.NoError(t, err) assert.Len(t, tx1states, 2) @@ -571,7 +571,7 @@ func TestStateContextMintSpendWithNullifier(t *testing.T) { // Mark the first state as "spending" transactionID2 := uuid.New() err = dc.AddStateLocks( - &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), State: stateID1, Transaction: transactionID2}, + &pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), StateID: stateID1, Transaction: transactionID2}, ) assert.NoError(t, err) @@ -610,7 +610,7 @@ func TestStateContextMintSpendWithNullifier(t *testing.T) { // - the creation of the nullifier in the DB might fail due to the state not existing err = dc.UpsertNullifiers(&components.NullifierUpsert{State: stateID2, ID: nullifier2}) assert.Regexp(t, "PD010126", err) - _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: stateID2, SchemaID: schemaID, Data: data2, CreatedBy: &transactionID1}) + _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: stateID2, Schema: schemaID, Data: data2, CreatedBy: &transactionID1}) require.NoError(t, err) err = dc.UpsertNullifiers(&components.NullifierUpsert{State: stateID2, ID: nullifier2}) require.NoError(t, err) @@ -676,10 +676,10 @@ func TestDomainContextFlushErrorCapture(t *testing.T) { err = dc.AddStateLocks(&pldapi.StateLock{}) assert.Regexp(t, "PD010119.*pop", err) // needs reset - _, err = dc.ExportStateLocks() + _, err = dc.ExportSnapshot() assert.Regexp(t, "PD010119.*pop", err) // needs reset - err = dc.ImportStateLocks([]byte("{}")) + err = dc.ImportSnapshot([]byte("{}")) assert.Regexp(t, "PD010119.*pop", err) // needs reset err = dc.AddStateLocks(&pldapi.StateLock{}) @@ -739,7 +739,7 @@ func TestDCMergeUnFlushedWhileFlushing(t *testing.T) { tktypes.RandHex(32))), nil, dc.customHashFunction) require.NoError(t, err) tx1 := uuid.New() - _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s1.ID, SchemaID: schema.ID(), Data: s1.Data, CreatedBy: &tx1}) + _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s1.ID, Schema: schema.ID(), Data: s1.Data, CreatedBy: &tx1}) require.NoError(t, err) // Fake a flush transition @@ -855,7 +855,7 @@ func TestDCMergeUnFlushedWhileFlushingDedup(t *testing.T) { tktypes.RandHex(32))), nil, dc.customHashFunction) require.NoError(t, err) tx1 := uuid.New() - _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s1.ID, SchemaID: schema.ID(), Data: s1.Data, CreatedBy: &tx1}) + _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s1.ID, Schema: schema.ID(), Data: s1.Data, CreatedBy: &tx1}) require.NoError(t, err) // We add a second state, that will be excluded from the query due to a spending lock @@ -863,9 +863,9 @@ func TestDCMergeUnFlushedWhileFlushingDedup(t *testing.T) { `{"amount": 20, "owner": "0x615dD09124271D8008225054d85Ffe720E7a447A", "salt": "%s"}`, tktypes.RandHex(32))), nil, dc.customHashFunction) require.NoError(t, err) - _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s2.ID, SchemaID: schema.ID(), Data: s2.Data, CreatedBy: &tx1}) + _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s2.ID, Schema: schema.ID(), Data: s2.Data, CreatedBy: &tx1}) require.NoError(t, err) - err = dc.AddStateLocks(&pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), State: s2.ID, Transaction: tx1}) + err = dc.AddStateLocks(&pldapi.StateLock{Type: pldapi.StateLockTypeSpend.Enum(), StateID: s2.ID, Transaction: tx1}) require.NoError(t, err) // Fake a flush transition @@ -908,7 +908,7 @@ func TestDCMergeUnFlushedEvalError(t *testing.T) { tktypes.RandHex(32))), nil, dc.customHashFunction) require.NoError(t, err) tx1 := uuid.New() - _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s1.ID, SchemaID: schema.ID(), Data: s1.Data, CreatedBy: &tx1}) + _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ID: s1.ID, Schema: schema.ID(), Data: s1.Data, CreatedBy: &tx1}) require.NoError(t, err) _, err = dc.mergeUnFlushedApplyLocks(schema, []*pldapi.State{}, @@ -995,7 +995,7 @@ func TestDCFindBadQueryAndInsertBadValue(t *testing.T) { assert.Regexp(t, "PD010700", err) _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ - SchemaID: schemaID, Data: tktypes.RawJSON(`"wrong"`), + Schema: schemaID, Data: tktypes.RawJSON(`"wrong"`), }) assert.Regexp(t, "FF22038", err) @@ -1012,8 +1012,8 @@ func TestDCUpsertStatesFailSchemaLookup(t *testing.T) { defer dc.Close() _, err := dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ - ID: tktypes.RandBytes(32), - SchemaID: tktypes.Bytes32(tktypes.RandBytes(32)), + ID: tktypes.RandBytes(32), + Schema: tktypes.Bytes32(tktypes.RandBytes(32)), }) assert.Regexp(t, "pop", err) @@ -1030,19 +1030,19 @@ func TestDCResetWithMixedTxns(t *testing.T) { state1 := tktypes.HexBytes("state1") transactionID1 := uuid.New() err := dc.AddStateLocks( - &pldapi.StateLock{State: state1, Type: pldapi.StateLockTypeRead.Enum(), Transaction: transactionID1}) + &pldapi.StateLock{StateID: state1, Type: pldapi.StateLockTypeRead.Enum(), Transaction: transactionID1}) require.NoError(t, err) state2 := tktypes.HexBytes("state2") transactionID2 := uuid.New() err = dc.AddStateLocks( - &pldapi.StateLock{State: state2, Type: pldapi.StateLockTypeSpend.Enum(), Transaction: transactionID2}) + &pldapi.StateLock{StateID: state2, Type: pldapi.StateLockTypeSpend.Enum(), Transaction: transactionID2}) require.NoError(t, err) dc.ResetTransactions(transactionID1) assert.Len(t, dc.txLocks, 1) - assert.Equal(t, dc.txLocks[0].State, state2) + assert.Equal(t, dc.txLocks[0].StateID, state2) } @@ -1086,7 +1086,7 @@ func TestCheckEvalGTTimestamp(t *testing.T) { } -func TestExportStateLocks(t *testing.T) { +func TestExportSnapshot(t *testing.T) { ctx, ss, _, _, done := newDBMockStateManager(t) defer done() @@ -1120,13 +1120,13 @@ func TestExportStateLocks(t *testing.T) { ss.p.DB(), &components.StateUpsert{ ID: s1.ID, - SchemaID: schema1.ID(), + Schema: schema1.ID(), Data: s1.Data, CreatedBy: &transactionID1, }, &components.StateUpsert{ ID: s2.ID, - SchemaID: schema2.ID(), + Schema: schema2.ID(), Data: s2.Data, CreatedBy: &transactionID2, }, @@ -1136,37 +1136,49 @@ func TestExportStateLocks(t *testing.T) { err = dc.AddStateLocks( &pldapi.StateLock{ Type: pldapi.StateLockTypeSpend.Enum(), - State: s2.ID, + StateID: s2.ID, Transaction: transactionID3, }, ) assert.NoError(t, err) - json, err := dc.ExportStateLocks() + json, err := dc.ExportSnapshot() require.NoError(t, err) - assert.JSONEq(t, ` - [ + assert.JSONEq(t, `{ + "locks": [ { - "stateID":"`+s1.ID.String()+`", + "stateId":"`+s1.ID.String()+`", "transaction":"`+transactionID1.String()+`", "type":"create" }, { - "stateID":"`+s2.ID.String()+`", + "stateId":"`+s2.ID.String()+`", "transaction":"`+transactionID2.String()+`", "type":"create" }, { - "stateID":"`+s2.ID.String()+`", + "stateId":"`+s2.ID.String()+`", "transaction":"`+transactionID3.String()+`", "type":"spend" } - ]`, - string(json), + ], + "states": [ + { + "id": "`+s1.ID.String()+`", + "schema": "`+s1.Schema.String()+`", + "data": `+s1.Data.String()+` + }, + { + "id": "`+s2.ID.String()+`", + "schema": "`+s2.Schema.String()+`", + "data": `+s2.Data.String()+` + } + ] + }`, string(json), ) } -func TestImportStateLocks(t *testing.T) { +func TestImportSnapshot(t *testing.T) { ctx, ss, _, done := newDBTestStateManager(t) defer done() @@ -1204,28 +1216,29 @@ func TestImportStateLocks(t *testing.T) { transactionID2 := uuid.New() transactionID3 := uuid.New() - _, err = dc.UpsertStates(ss.p.DB(), - &components.StateUpsert{ - ID: s1.ID, - SchemaID: schema1.ID(), - Data: s1.Data, + stateUpserts := []*components.StateUpsert{ + { + ID: s1.ID, + Schema: schema1.ID(), + Data: s1.Data, }, - &components.StateUpsert{ - ID: s2.ID, - SchemaID: schema1.ID(), - Data: s2.Data, + { + ID: s2.ID, + Schema: schema1.ID(), + Data: s2.Data, }, - &components.StateUpsert{ - ID: s4.ID, - SchemaID: schema1.ID(), - Data: s4.Data, + { + ID: s4.ID, + Schema: schema1.ID(), + Data: s4.Data, }, - &components.StateUpsert{ - ID: s5.ID, - SchemaID: schema1.ID(), - Data: s5.Data, + { + ID: s5.ID, + Schema: schema1.ID(), + Data: s5.Data, }, - ) + } + _, err = dc.UpsertStates(ss.p.DB(), stateUpserts...) require.NoError(t, err) //imported locks include @@ -1235,8 +1248,8 @@ func TestImportStateLocks(t *testing.T) { // - state4 created by transaction3 for which we do have the data // and does not include state5 even though we do have the data for that // so after all that, the only available states should be state1 and state 4 - jsonToImport := fmt.Sprintf(` - [ + jsonToImport := fmt.Sprintf(`{ + "locks": [ { "stateID":"%s", "transaction":"%s", @@ -1262,7 +1275,9 @@ func TestImportStateLocks(t *testing.T) { "transaction":"%s", "type":"spend" } - ]`, + ], + "states": `+tktypes.JSONString(stateUpserts).Pretty()+` + }`, s1.ID.String(), transactionID1.String(), s2.ID.String(), transactionID2.String(), s3ID, transactionID3.String(), @@ -1270,7 +1285,7 @@ func TestImportStateLocks(t *testing.T) { s2.ID.String(), transactionID3.String(), ) - err = dc.ImportStateLocks([]byte(jsonToImport)) + err = dc.ImportSnapshot([]byte(jsonToImport)) require.NoError(t, err) _, states, err := dc.FindAvailableStates(ss.p.DB(), schema1.ID(), query.NewQueryBuilder().Query()) require.NoError(t, err) @@ -1280,7 +1295,28 @@ func TestImportStateLocks(t *testing.T) { } -func TestImportStateLocksJSONError(t *testing.T) { +func TestImportSnapshotBadStates(t *testing.T) { + + ctx, ss, _, done := newDBTestStateManager(t) + defer done() + + _, dc := newTestDomainContext(t, ctx, ss, "domain1", false) + defer dc.Close() + + err := dc.ImportSnapshot([]byte(`{ + "states": [ + { + "id": "` + tktypes.RandHex(32) + `", + "schema": "` + tktypes.RandHex(32) + `", + "data": {} + } + ] + }`)) + require.Regexp(t, "PD010133.*PD010106" /* unknown state schema */, err) + +} + +func TestImportSnapshotJSONError(t *testing.T) { ctx, ss, _, _, done := newDBMockStateManager(t) defer done() @@ -1294,7 +1330,7 @@ func TestImportStateLocksJSONError(t *testing.T) { } ]` - err := dc.ImportStateLocks([]byte(jsonToImport)) + err := dc.ImportSnapshot([]byte(jsonToImport)) assert.Error(t, err) assert.Regexp(t, "PD010132", err) diff --git a/core/go/internal/statemgr/state_status_test.go b/core/go/internal/statemgr/state_status_test.go index 12539e1df..d1ebf37b5 100644 --- a/core/go/internal/statemgr/state_status_test.go +++ b/core/go/internal/statemgr/state_status_test.go @@ -65,7 +65,7 @@ func genWidget(t *testing.T, schemaID tktypes.Bytes32, txID *uuid.UUID, withoutS withSalt, err := json.Marshal(ij) require.NoError(t, err) return &components.StateUpsert{ - SchemaID: schemaID, + Schema: schemaID, Data: withSalt, CreatedBy: txID, } @@ -207,7 +207,7 @@ func TestStateLockingQuery(t *testing.T) { err = dc.AddStateLocks(&pldapi.StateLock{ Type: pldapi.StateLockTypeSpend.Enum(), Transaction: txID2, - State: widgets[5].ID, + StateID: widgets[5].ID, }) require.NoError(t, err) @@ -255,7 +255,7 @@ func TestStateLockingQuery(t *testing.T) { txID13 := uuid.New() _, err = dc.UpsertStates(ss.p.DB(), &components.StateUpsert{ ID: widgets[3].ID, - SchemaID: widgets[3].Schema, + Schema: widgets[3].Schema, Data: widgets[3].Data, CreatedBy: &txID13, }) diff --git a/core/go/internal/transportmgr/reliable_msg_handler.go b/core/go/internal/transportmgr/reliable_msg_handler.go index b1f0f9975..248b16418 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler.go +++ b/core/go/internal/transportmgr/reliable_msg_handler.go @@ -136,7 +136,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go for _, s := range states { _, err := tm.stateManager.WriteReceivedStates(ctx, dbTX, domain, []*components.StateUpsertOutsideContext{s.state}) if err != nil { - log.L(ctx).Errorf("insert state %s from message %s for domain %s failed - attempting each individually: %s", s.state.ID, s.ack.id, domain, batchErr) + log.L(ctx).Errorf("insert state %s from message %s for domain %s failed: %s", s.state.ID, s.ack.id, domain, batchErr) s.ack.Error = err.Error() } } @@ -277,6 +277,7 @@ func parseStateDistribution(ctx context.Context, msgID uuid.UUID, data []byte) ( var contractAddr *tktypes.EthAddress err = json.Unmarshal(data, &sd) if err == nil { + parsed.Data = sd.StateData parsed.ID, err = tktypes.ParseHexBytes(ctx, sd.StateID) } if err == nil { diff --git a/toolkit/go/pkg/pldapi/states.go b/toolkit/go/pkg/pldapi/states.go index 7fecc727f..afb985061 100644 --- a/toolkit/go/pkg/pldapi/states.go +++ b/toolkit/go/pkg/pldapi/states.go @@ -233,7 +233,7 @@ func (tt StateLockType) Options() []string { // (and maybe later spending) a state that is yet to be confirmed. type StateLock struct { DomainName string `json:"-"` - State tktypes.HexBytes `json:"-"` + StateID tktypes.HexBytes `json:"-"` Transaction uuid.UUID `docstruct:"StateLock" json:"transaction"` Type tktypes.Enum[StateLockType] `docstruct:"StateLock" json:"type"` } From da143aaf9213b78e0edaac08f2d4917f9e49bcb0 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 21:53:01 -0500 Subject: [PATCH 31/41] Generate nullifiers when writing received states Signed-off-by: Peter Broadhurst --- core/go/internal/components/privatetxmgr.go | 1 + .../privatetxnmgr/nullifier_distribution.go | 6 +- core/go/internal/transportmgr/manager.go | 2 + core/go/internal/transportmgr/manager_test.go | 3 + .../transportmgr/reliable_msg_handler.go | 65 +++++-- .../transportmgr/reliable_msg_handler_test.go | 167 +++++++++++++++++- 6 files changed, 220 insertions(+), 24 deletions(-) diff --git a/core/go/internal/components/privatetxmgr.go b/core/go/internal/components/privatetxmgr.go index eab0d744f..2c07dd629 100644 --- a/core/go/internal/components/privatetxmgr.go +++ b/core/go/internal/components/privatetxmgr.go @@ -96,5 +96,6 @@ type PrivateTxManager interface { PrivateTransactionConfirmed(ctx context.Context, receipt *TxCompletion) BuildStateDistributions(ctx context.Context, tx *PrivateTransaction) (*StateDistributionSet, error) + BuildNullifier(ctx context.Context, kr KeyResolver, s *StateDistributionWithData) (*NullifierUpsert, error) BuildNullifiers(ctx context.Context, distributions []*StateDistributionWithData) (nullifiers []*NullifierUpsert, err error) } diff --git a/core/go/internal/privatetxnmgr/nullifier_distribution.go b/core/go/internal/privatetxnmgr/nullifier_distribution.go index 54997c578..2c254bed0 100644 --- a/core/go/internal/privatetxnmgr/nullifier_distribution.go +++ b/core/go/internal/privatetxnmgr/nullifier_distribution.go @@ -25,7 +25,7 @@ import ( "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" ) -func (p *privateTxManager) buildNullifier(ctx context.Context, krc components.KeyResolutionContextLazyDB, s *components.StateDistributionWithData) (*components.NullifierUpsert, error) { +func (p *privateTxManager) BuildNullifier(ctx context.Context, kr components.KeyResolver, s *components.StateDistributionWithData) (*components.NullifierUpsert, error) { // We need to call the signing engine with the local identity to build the nullifier log.L(ctx).Infof("Generating nullifier for state %s on node %s (algorithm=%s,verifierType=%s,payloadType=%s)", s.StateID, p.nodeName, *s.NullifierAlgorithm, *s.NullifierVerifierType, *s.NullifierPayloadType) @@ -38,7 +38,7 @@ func (p *privateTxManager) buildNullifier(ctx context.Context, krc components.Ke // Call the signing engine to build the nullifier var nulliferBytes []byte - mapping, err := krc.KeyResolverLazyDB().ResolveKey(identifier, *s.NullifierAlgorithm, *s.NullifierVerifierType) + mapping, err := kr.ResolveKey(identifier, *s.NullifierAlgorithm, *s.NullifierVerifierType) if err == nil { nulliferBytes, err = p.components.KeyManager().Sign(ctx, mapping, *s.NullifierPayloadType, s.StateData.Bytes()) } @@ -78,7 +78,7 @@ func (p *privateTxManager) BuildNullifiers(ctx context.Context, stateDistributio continue } - nullifier, err := p.buildNullifier(ctx, krc, s) + nullifier, err := p.BuildNullifier(ctx, krc.KeyResolverLazyDB(), s) if err != nil { return err } diff --git a/core/go/internal/transportmgr/manager.go b/core/go/internal/transportmgr/manager.go index ebe23aad0..3a79bd81c 100644 --- a/core/go/internal/transportmgr/manager.go +++ b/core/go/internal/transportmgr/manager.go @@ -50,6 +50,7 @@ type transportManager struct { registryManager components.RegistryManager stateManager components.StateManager domainManager components.DomainManager + keyManager components.KeyManager txManager components.TXManager privateTxManager components.PrivateTxManager identityResolver components.IdentityResolver @@ -112,6 +113,7 @@ func (tm *transportManager) PostInit(c components.AllComponents) error { tm.registryManager = c.RegistryManager() tm.stateManager = c.StateManager() tm.domainManager = c.DomainManager() + tm.keyManager = c.KeyManager() tm.txManager = c.TxManager() tm.privateTxManager = c.PrivateTxManager() tm.identityResolver = c.IdentityResolver() diff --git a/core/go/internal/transportmgr/manager_test.go b/core/go/internal/transportmgr/manager_test.go index 7dccc30d3..a6706ae81 100644 --- a/core/go/internal/transportmgr/manager_test.go +++ b/core/go/internal/transportmgr/manager_test.go @@ -42,6 +42,7 @@ type mockComponents struct { registryManager *componentmocks.RegistryManager stateManager *componentmocks.StateManager domainManager *componentmocks.DomainManager + keyManager *componentmocks.KeyManager txManager *componentmocks.TXManager privateTxManager *componentmocks.PrivateTxManager identityResolver *componentmocks.IdentityResolver @@ -52,6 +53,7 @@ func newMockComponents(t *testing.T, realDB bool) *mockComponents { mc.registryManager = componentmocks.NewRegistryManager(t) mc.stateManager = componentmocks.NewStateManager(t) mc.domainManager = componentmocks.NewDomainManager(t) + mc.keyManager = componentmocks.NewKeyManager(t) mc.txManager = componentmocks.NewTXManager(t) mc.privateTxManager = componentmocks.NewPrivateTxManager(t) mc.identityResolver = componentmocks.NewIdentityResolver(t) @@ -70,6 +72,7 @@ func newMockComponents(t *testing.T, realDB bool) *mockComponents { mc.c.On("RegistryManager").Return(mc.registryManager).Maybe() mc.c.On("StateManager").Return(mc.stateManager).Maybe() mc.c.On("DomainManager").Return(mc.domainManager).Maybe() + mc.c.On("KeyManager").Return(mc.keyManager).Maybe() mc.c.On("TxManager").Return(mc.txManager).Maybe() mc.c.On("PrivateTxManager").Return(mc.privateTxManager).Maybe() mc.c.On("IdentityResolver").Return(mc.identityResolver).Maybe() diff --git a/core/go/internal/transportmgr/reliable_msg_handler.go b/core/go/internal/transportmgr/reliable_msg_handler.go index 248b16418..5d80e1b7a 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler.go +++ b/core/go/internal/transportmgr/reliable_msg_handler.go @@ -65,8 +65,27 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go var acksToWrite []*components.ReliableMessageAck var acksToSend []*ackInfo statesToAdd := make(map[string][]*stateAndAck) + nullifierUpserts := make(map[string][]*components.NullifierUpsert) var preparedTxnToAdd []*components.PreparedTransactionWithRefs var txReceiptsToFinalize []*components.ReceiptInput + var writePreparedTxPostCommit func() + var krc components.KeyResolutionContext + + cleanup := func(err error) { + if krc != nil { + krc.Close(err == nil) + } + if err == nil { + + // We've committed the database work ok - send the acks/nacks to the other side + for _, a := range acksToSend { + _ = tm.queueFireAndForget(ctx, a.node, buildAck(a.id, a.Error)) + } + if writePreparedTxPostCommit != nil { + writePreparedTxPostCommit() + } + } + } // The batch can contain different kinds of message that all need persistence activity for _, v := range values { @@ -74,6 +93,17 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go switch v.msg.MessageType { case RMHMessageTypeStateDistribution: sd, stateToAdd, err := parseStateDistribution(ctx, v.msg.MessageID, v.msg.Payload) + if err == nil && sd.NullifierAlgorithm != nil && sd.NullifierVerifierType != nil && sd.NullifierPayloadType != nil { + // We need to build any nullifiers that are required, before we dispatch to persistence + if krc == nil { + krc = tm.keyManager.NewKeyResolutionContext(ctx) + } + var nullifier *components.NullifierUpsert + nullifier, err = tm.privateTxManager.BuildNullifier(ctx, krc.KeyResolver(dbTX), sd) + if err == nil { + nullifierUpserts[sd.Domain] = append(nullifierUpserts[sd.Domain], nullifier) + } + } if err != nil { acksToSend = append(acksToSend, &ackInfo{node: v.p.Name, id: v.msg.MessageID, Error: err.Error()}, // reject the message permanently @@ -156,7 +186,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go var matchedMsgs []*components.ReliableMessage err := dbTX.WithContext(ctx).Select("id").Find(&matchedMsgs).Error if err != nil { - return nil, nil, err + return cleanup, nil, err } validatedAcks := make([]*components.ReliableMessageAck, 0, len(acksToWrite)) for _, a := range acksToWrite { @@ -170,7 +200,7 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go if len(validatedAcks) > 0 { // Now we're actually ready to insert them if err := tm.writeAcks(ctx, dbTX, acksToWrite...); err != nil { - return nil, nil, err + return cleanup, nil, err } } } @@ -178,31 +208,34 @@ func (tm *transportManager) handleReliableMsgBatch(ctx context.Context, dbTX *go // Insert the transaction receipts if len(txReceiptsToFinalize) > 0 { if err := tm.txManager.FinalizeTransactions(ctx, dbTX, txReceiptsToFinalize); err != nil { - return nil, nil, err + return cleanup, nil, err } } // Insert the prepared transactions, capturing any post-commit - var writePreparedTxPostCommit func() if len(preparedTxnToAdd) > 0 { var err error if writePreparedTxPostCommit, err = tm.txManager.WritePreparedTransactions(ctx, dbTX, preparedTxnToAdd); err != nil { - return nil, nil, err + return cleanup, nil, err } } - // We use a post-commit handler to send back any acks to the other side that are required - return func(err error) { - if err == nil { - // We've committed the database work ok - send the acks/nacks to the other side - for _, a := range acksToSend { - _ = tm.queueFireAndForget(ctx, a.node, buildAck(a.id, a.Error)) - } - if writePreparedTxPostCommit != nil { - writePreparedTxPostCommit() - } + // Write any nullifiers we generated + for domain, nullifiers := range nullifierUpserts { + if err := tm.stateManager.WriteNullifiersForReceivedStates(ctx, dbTX, domain, nullifiers); err != nil { + return cleanup, nil, err + } + } + + // Ensure we close out the key resolution context if we started one + if krc != nil { + if err := krc.PreCommit(); err != nil { + return cleanup, nil, err } - }, make([]flushwriter.Result[*noResult], len(values)), nil + } + + // We use a post-commit handler to send back any acks to the other side that are required + return cleanup, make([]flushwriter.Result[*noResult], len(values)), nil } diff --git a/core/go/internal/transportmgr/reliable_msg_handler_test.go b/core/go/internal/transportmgr/reliable_msg_handler_test.go index 5b5dd0c9c..186a44efc 100644 --- a/core/go/internal/transportmgr/reliable_msg_handler_test.go +++ b/core/go/internal/transportmgr/reliable_msg_handler_test.go @@ -26,6 +26,7 @@ import ( "github.com/kaleido-io/paladin/config/pkg/confutil" "github.com/kaleido-io/paladin/config/pkg/pldconf" "github.com/kaleido-io/paladin/core/internal/components" + "github.com/kaleido-io/paladin/core/mocks/componentmocks" "github.com/kaleido-io/paladin/toolkit/pkg/pldapi" "github.com/kaleido-io/paladin/toolkit/pkg/prototk" "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" @@ -61,12 +62,23 @@ func setupAckOrNackCheck(t *testing.T, tp *testPlugin, msgID uuid.UUID, expected } } -func TestReceiveMessageStateSendAckRealDB(t *testing.T) { +func TestReceiveMessageStateWithNullifierSendAckRealDB(t *testing.T) { ctx, _, tp, done := newTestTransport(t, true, mockGoodTransport, func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { mc.stateManager.On("WriteReceivedStates", mock.Anything, mock.Anything, "domain1", mock.Anything). Return(nil, nil).Once() + nullifier := &components.NullifierUpsert{ID: tktypes.RandBytes(32)} + mc.stateManager.On("WriteNullifiersForReceivedStates", mock.Anything, mock.Anything, "domain1", []*components.NullifierUpsert{nullifier}). + Return(nil).Once() + mkrc := componentmocks.NewKeyResolutionContext(t) + mkr := componentmocks.NewKeyResolver(t) + mc.privateTxManager.On("BuildNullifier", mock.Anything, mkr, mock.Anything).Return(nullifier, nil) + mkrc.On("KeyResolver", mock.Anything).Return(mkr) + mkrc.On("PreCommit").Return(nil) + mkrc.On("Close", true).Return(nil) + mc.keyManager.On("NewKeyResolutionContext", mock.Anything). + Return(mkrc).Once() }, ) defer done() @@ -79,10 +91,13 @@ func TestReceiveMessageStateSendAckRealDB(t *testing.T) { MessageType: RMHMessageTypeStateDistribution, Payload: tktypes.JSONString(&components.StateDistributionWithData{ StateDistribution: components.StateDistribution{ - Domain: "domain1", - ContractAddress: tktypes.RandAddress().String(), - SchemaID: tktypes.RandHex(32), - StateID: tktypes.RandHex(32), + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), + StateID: tktypes.RandHex(32), + NullifierAlgorithm: confutil.P("algo1"), + NullifierVerifierType: confutil.P("vtype1"), + NullifierPayloadType: confutil.P("ptype1"), }, StateData: []byte(`{"some":"data"}`), }), @@ -151,6 +166,55 @@ func TestHandleStateDistroBadState(t *testing.T) { ackNackCheck() } +func TestHandleStateDistroBadNullifier(t *testing.T) { + ctx, tm, tp, done := newTestTransport(t, false, + mockGoodTransport, + mockEmptyReliableMsgs, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mkrc := componentmocks.NewKeyResolutionContext(t) + mkr := componentmocks.NewKeyResolver(t) + mc.privateTxManager.On("BuildNullifier", mock.Anything, mkr, mock.Anything).Return(nil, fmt.Errorf("bad nullifier")) + mkrc.On("KeyResolver", mock.Anything).Return(mkr) + mkrc.On("PreCommit").Return(nil) + mkrc.On("Close", true).Return(nil) + mc.keyManager.On("NewKeyResolutionContext", mock.Anything). + Return(mkrc).Once() + }, + ) + defer done() + + msg := testReceivedReliableMsg( + RMHMessageTypeStateDistribution, + &components.StateDistributionWithData{ + StateDistribution: components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), + StateID: tktypes.RandHex(32), + NullifierAlgorithm: confutil.P("algo1"), + NullifierVerifierType: confutil.P("vtype1"), + NullifierPayloadType: confutil.P("ptype1"), + }, + StateData: []byte(`{"some":"data"}`), + }) + + ackNackCheck := setupAckOrNackCheck(t, tp, msg.MessageID, "bad nullifier") + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + // Handle the batch - will fail to write the states + postCommit, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {p: p, msg: msg}, + }) + require.NoError(t, err) + + // Run the postCommit and check we get the nack + postCommit(nil) + + ackNackCheck() +} + func TestHandleStateDistroBadMsg(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport, @@ -328,6 +392,99 @@ func TestHandlePreparedTxFail(t *testing.T) { } +func TestHandleNullifierFail(t *testing.T) { + ctx, tm, _, done := newTestTransport(t, false, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.stateManager.On("WriteReceivedStates", mock.Anything, mock.Anything, "domain1", mock.Anything). + Return(nil, nil).Once() + nullifier := &components.NullifierUpsert{ID: tktypes.RandBytes(32)} + mc.stateManager.On("WriteNullifiersForReceivedStates", mock.Anything, mock.Anything, "domain1", []*components.NullifierUpsert{nullifier}). + Return(fmt.Errorf("pop")).Once() + mkrc := componentmocks.NewKeyResolutionContext(t) + mkr := componentmocks.NewKeyResolver(t) + mc.privateTxManager.On("BuildNullifier", mock.Anything, mkr, mock.Anything).Return(nullifier, nil) + mkrc.On("KeyResolver", mock.Anything).Return(mkr) + mkrc.On("Close", false).Return(nil) + mc.keyManager.On("NewKeyResolutionContext", mock.Anything). + Return(mkrc).Once() + }, + ) + defer done() + + msg := testReceivedReliableMsg( + RMHMessageTypeStateDistribution, + &components.StateDistributionWithData{ + StateDistribution: components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), + StateID: tktypes.RandHex(32), + NullifierAlgorithm: confutil.P("algo1"), + NullifierVerifierType: confutil.P("vtype1"), + NullifierPayloadType: confutil.P("ptype1"), + }, + StateData: []byte(`{"some":"data"}`), + }) + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {p: p, msg: msg}, + }) + require.Regexp(t, "pop", err) + + pc(err) + +} + +func TestHandleNullifierPreCommitKRCFail(t *testing.T) { + ctx, tm, _, done := newTestTransport(t, false, + func(mc *mockComponents, conf *pldconf.TransportManagerConfig) { + mc.stateManager.On("WriteReceivedStates", mock.Anything, mock.Anything, "domain1", mock.Anything). + Return(nil, nil).Once() + nullifier := &components.NullifierUpsert{ID: tktypes.RandBytes(32)} + mc.stateManager.On("WriteNullifiersForReceivedStates", mock.Anything, mock.Anything, "domain1", []*components.NullifierUpsert{nullifier}). + Return(nil).Once() + mkrc := componentmocks.NewKeyResolutionContext(t) + mkr := componentmocks.NewKeyResolver(t) + mc.privateTxManager.On("BuildNullifier", mock.Anything, mkr, mock.Anything).Return(nullifier, nil) + mkrc.On("KeyResolver", mock.Anything).Return(mkr) + mkrc.On("PreCommit").Return(fmt.Errorf("pop")) + mkrc.On("Close", false).Return(nil) + mc.keyManager.On("NewKeyResolutionContext", mock.Anything). + Return(mkrc).Once() + }, + ) + defer done() + + msg := testReceivedReliableMsg( + RMHMessageTypeStateDistribution, + &components.StateDistributionWithData{ + StateDistribution: components.StateDistribution{ + Domain: "domain1", + ContractAddress: tktypes.RandAddress().String(), + SchemaID: tktypes.RandHex(32), + StateID: tktypes.RandHex(32), + NullifierAlgorithm: confutil.P("algo1"), + NullifierVerifierType: confutil.P("vtype1"), + NullifierPayloadType: confutil.P("ptype1"), + }, + StateData: []byte(`{"some":"data"}`), + }) + + p, err := tm.getPeer(ctx, "node2", false) + require.NoError(t, err) + + pc, _, err := tm.handleReliableMsgBatch(ctx, tm.persistence.DB(), []*reliableMsgOp{ + {p: p, msg: msg}, + }) + require.Regexp(t, "pop", err) + + pc(err) + +} + func TestHandleReceiptBadData(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport, From a7c5aa9f5b51a70a821de474361bfc2592bd73b9 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 22:00:10 -0500 Subject: [PATCH 32/41] Fix SQLite migration Signed-off-by: Peter Broadhurst --- core/go/componenttest/utils_for_test.go | 2 ++ .../db/migrations/sqlite/000014_peer_queued_messages.down.sql | 2 -- core/go/pkg/testbed/testbed.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/go/componenttest/utils_for_test.go b/core/go/componenttest/utils_for_test.go index 294cdd1eb..b07412af2 100644 --- a/core/go/componenttest/utils_for_test.go +++ b/core/go/componenttest/utils_for_test.go @@ -381,6 +381,8 @@ func testConfig(t *testing.T) pldconf.PaladinConfig { conf.RPCServer.WS.Disabled = true conf.Log.Level = confutil.P("info") + conf.TransportManagerConfig.ReliableMessageWriter.BatchMaxSize = confutil.P(1) + conf.Wallets[0].Signer.KeyStore.Static.Keys["seed"] = pldconf.StaticKeyEntryConfig{ Encoding: "hex", Inline: tktypes.RandHex(32), diff --git a/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql b/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql index 39434162b..99eeb9867 100644 --- a/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql +++ b/core/go/db/migrations/sqlite/000014_peer_queued_messages.down.sql @@ -1,5 +1,3 @@ -BEGIN; DROP TABLE reliable_msg_acks; DROP TABLE reliable_msgs; -COMMIT; diff --git a/core/go/pkg/testbed/testbed.go b/core/go/pkg/testbed/testbed.go index 479a1b9f2..6ddb4084f 100644 --- a/core/go/pkg/testbed/testbed.go +++ b/core/go/pkg/testbed/testbed.go @@ -160,7 +160,7 @@ func unitTestComponentManagerStart(ctx context.Context, conf *pldconf.PaladinCon return cm, err } -func (tb *testbed) HandlePaladinMsg(context.Context, *components.TransportMessage) { +func (tb *testbed) HandlePaladinMsg(context.Context, *components.ReceivedMessage) { // no-op } From 456023b06d8528e1b3b0dac19b05179e1679c40e Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 22:03:11 -0500 Subject: [PATCH 33/41] Fix down migrations for later deletion of table Signed-off-by: Peter Broadhurst --- .../postgres/000012_create_prepared_tx_tables.down.sql | 8 ++++---- .../sqlite/000012_create_prepared_tx_tables.down.sql | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/go/db/migrations/postgres/000012_create_prepared_tx_tables.down.sql b/core/go/db/migrations/postgres/000012_create_prepared_tx_tables.down.sql index 23a2858f0..7758ff7cc 100644 --- a/core/go/db/migrations/postgres/000012_create_prepared_tx_tables.down.sql +++ b/core/go/db/migrations/postgres/000012_create_prepared_tx_tables.down.sql @@ -1,6 +1,6 @@ BEGIN; -DROP TABLE prepared_txn_states; -DROP TABLE prepared_txn_distribution_acknowledgments; -DROP TABLE prepared_txn_distributions; -DROP TABLE prepared_txns; +DROP TABLE IF EXISTS prepared_txn_states; +DROP TABLE IF EXISTS prepared_txn_distribution_acknowledgments; +DROP TABLE IF EXISTS prepared_txn_distributions; +DROP TABLE IF EXISTS prepared_txns; COMMIT; \ No newline at end of file diff --git a/core/go/db/migrations/sqlite/000012_create_prepared_tx_tables.down.sql b/core/go/db/migrations/sqlite/000012_create_prepared_tx_tables.down.sql index 0ee252c5a..633cefd95 100644 --- a/core/go/db/migrations/sqlite/000012_create_prepared_tx_tables.down.sql +++ b/core/go/db/migrations/sqlite/000012_create_prepared_tx_tables.down.sql @@ -1,4 +1,4 @@ -DROP TABLE prepared_txn_states; -DROP TABLE prepared_txn_distribution_acknowledgments; -DROP TABLE prepared_txn_distributions; -DROP TABLE prepared_txns; +DROP TABLE IF EXISTS prepared_txn_states; +DROP TABLE IF EXISTS prepared_txn_distribution_acknowledgments; +DROP TABLE IF EXISTS prepared_txn_distributions; +DROP TABLE IF EXISTS prepared_txns; From 59ae8f97105501de488580073934987a59f40dec Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 22:09:42 -0500 Subject: [PATCH 34/41] Up coverage Signed-off-by: Peter Broadhurst --- core/go/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/go/build.gradle b/core/go/build.gradle index cd3c402c1..62c500295 100644 --- a/core/go/build.gradle +++ b/core/go/build.gradle @@ -31,7 +31,7 @@ ext { include "mocks/**/*.go" } - targetCoverage = 92.5 + targetCoverage = 93.5 maxCoverageBarGap = 1 coverageExcludedPackages = [ 'github.com/kaleido-io/paladin/core/internal/privatetxnmgr/ptmgrtypes/mock_transaction_flow.go', From 133e1fd4f1c2704155f242ebeca313340e99a75a Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Sun, 5 Jan 2025 22:19:37 -0500 Subject: [PATCH 35/41] Fix client for struct change Signed-off-by: Peter Broadhurst --- toolkit/go/pkg/pldclient/txbuilder_test.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/toolkit/go/pkg/pldclient/txbuilder_test.go b/toolkit/go/pkg/pldclient/txbuilder_test.go index c1c61d2a6..b28ae8714 100644 --- a/toolkit/go/pkg/pldclient/txbuilder_test.go +++ b/toolkit/go/pkg/pldclient/txbuilder_test.go @@ -312,10 +312,12 @@ func TestBuildAndPreparePrivateTXHTTPOk(t *testing.T) { "ptx_getPreparedTransaction", rpcserver.RPCMethod1(func(ctx context.Context, suppliedID uuid.UUID) (*pldapi.PreparedTransaction, error) { require.Equal(t, txID, suppliedID) return &pldapi.PreparedTransaction{ - ID: txID, - Transaction: pldapi.TransactionInput{ - TransactionBase: pldapi.TransactionBase{ - IdempotencyKey: "tx1", + PreparedTransactionBase: &pldapi.PreparedTransactionBase{ + ID: txID, + Transaction: pldapi.TransactionInput{ + TransactionBase: pldapi.TransactionBase{ + IdempotencyKey: "tx1", + }, }, }, }, nil From 2c9655b7abb442bf09667b42e33e868795763fb6 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Mon, 6 Jan 2025 08:56:58 -0500 Subject: [PATCH 36/41] Add docs for peer JSON/RPC apis Signed-off-by: Peter Broadhurst --- core/go/internal/transportmgr/peer.go | 12 ++--- doc-site/docs/reference/apis/transport.md | 16 ++++++ .../types/_includes/peerinfo_description.md | 0 doc-site/docs/reference/types/peerinfo.md | 49 +++++++++++++++++++ .../reference/types/preparedtransaction.md | 4 -- toolkit/go/pkg/pldapi/peerinfo.go | 11 +++-- toolkit/go/pkg/pldclient/transport.go | 8 +++ toolkit/go/pkg/reference/reference.go | 1 + toolkit/go/pkg/tkmsgs/en_descriptions.go | 19 +++++++ 9 files changed, 105 insertions(+), 15 deletions(-) create mode 100644 doc-site/docs/reference/types/_includes/peerinfo_description.md create mode 100644 doc-site/docs/reference/types/peerinfo.md diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index 6fae3fac5..c35fe1b0d 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -191,7 +191,7 @@ func (tm *transportManager) connectPeer(ctx context.Context, nodeName string, se tm.peers[nodeName] = p if sending { - p.OutboundError = p.startSender() + p.OutboundTransport, p.OutboundError = p.startSender() if p.OutboundError != nil { // Note the peer is still in our list, but not connected for send. // This means status can be reported for it. @@ -202,12 +202,12 @@ func (tm *transportManager) connectPeer(ctx context.Context, nodeName string, se return p, nil } -func (p *peer) startSender() error { +func (p *peer) startSender() (string, error) { // Note the registry is responsible for caching to make this call as efficient as if // we maintained the transport details in-memory ourselves. registeredTransportDetails, err := p.tm.registryManager.GetNodeTransports(p.ctx, p.Name) if err != nil { - return err + return "", err } // See if any of the transports registered by the node, are configured on this local node @@ -224,7 +224,7 @@ func (p *peer) startSender() error { for _, rtd := range registeredTransportDetails { registeredTransportNames = append(registeredTransportNames, rtd.Transport) } - return i18n.NewError(p.ctx, msgs.MsgTransportNoTransportsConfiguredForNode, p.Name, registeredTransportNames) + return "", i18n.NewError(p.ctx, msgs.MsgTransportNoTransportsConfiguredForNode, p.Name, registeredTransportNames) } // Activate the connection (the deactivate is deferred to the send loop) @@ -233,7 +233,7 @@ func (p *peer) startSender() error { TransportDetails: remoteTransportDetails, }) if err != nil { - return err + return p.transport.name, err } if err = json.Unmarshal([]byte(res.PeerInfoJson), &p.Outbound); err != nil { // We've already activated at this point, so we need to keep going - but this @@ -245,7 +245,7 @@ func (p *peer) startSender() error { log.L(p.ctx).Debugf("connected to peer '%s'", p.Name) p.senderStarted.Store(true) go p.sender() - return nil + return p.transport.name, nil } func (p *peer) notifyPersistedMsgAvailable() { diff --git a/doc-site/docs/reference/apis/transport.md b/doc-site/docs/reference/apis/transport.md index 09a7d24ba..a2bb2178b 100644 --- a/doc-site/docs/reference/apis/transport.md +++ b/doc-site/docs/reference/apis/transport.md @@ -23,3 +23,19 @@ title: transport_* 0. `nodeName`: `string` +## `transport_peerInfo` + +### Parameters + +0. `nodeName`: `string` + +### Returns + +0. `peer`: [`PeerInfo`](../types/peerinfo.md#peerinfo) + +## `transport_peers` + +### Returns + +0. `peers`: [`PeerInfo[]`](../types/peerinfo.md#peerinfo) + diff --git a/doc-site/docs/reference/types/_includes/peerinfo_description.md b/doc-site/docs/reference/types/_includes/peerinfo_description.md new file mode 100644 index 000000000..e69de29bb diff --git a/doc-site/docs/reference/types/peerinfo.md b/doc-site/docs/reference/types/peerinfo.md new file mode 100644 index 000000000..e1d8f476f --- /dev/null +++ b/doc-site/docs/reference/types/peerinfo.md @@ -0,0 +1,49 @@ +--- +title: PeerInfo +--- +{% include-markdown "./_includes/peerinfo_description.md" %} + +### Example + +```json +{ + "name": "", + "stats": { + "sentMsgs": 0, + "receivedMsgs": 0, + "sentBytes": 0, + "receivedBytes": 0, + "lastSend": null, + "lastReceive": null, + "reliableHighestSent": 0, + "reliableAckBase": 0 + }, + "activated": 0 +} +``` + +### Field Descriptions + +| Field Name | Description | Type | +|------------|-------------|------| +| `name` | The name of the peer node | `string` | +| `stats` | Statistics for the outbound and inbound data transfer | [`PeerStats`](#peerstats) | +| `activated` | The time when the peer was activated by an attempt to send data, or data arriving on a transport from this peer | [`Timestamp`](simpletypes.md#timestamp) | +| `outboundTransport` | The name of the transport selected for outbound connection to the peer. Omitted if no attempt to send data has occurred for this peer | `string` | +| `outbound` | Transport specific information about an established outbound connection to the peer. Omitted if the peer does not have an established outbound connection | `` | +| `outboundError` | Contains an error if attempting to send data, and the transport connection failed | `error` | + +## PeerStats + +| Field Name | Description | Type | +|------------|-------------|------| +| `sentMsgs` | Count of messages sent since activation of this peer | `uint64` | +| `receivedMsgs` | Count of messages received since activation of this peer | `uint64` | +| `sentBytes` | Count of payload bytes sent since activation of this peer (does not include header data) | `uint64` | +| `receivedBytes` | Count of payload bytes received since activation of this peer (does not include header data) | `uint64` | +| `lastSend` | Timestamp of the last send to this peer | [`Timestamp`](simpletypes.md#timestamp) | +| `lastReceive` | Timestamp of the last receive from this peer | [`Timestamp`](simpletypes.md#timestamp) | +| `reliableHighestSent` | Outbound reliable messages are assigned a sequence. This is the highest sequence sent to the peer since activation | `uint64` | +| `reliableAckBase` | Outbound reliable messages are assigned a sequence. This is the lowest sequence that has not received an acknowledgement from the peer | `uint64` | + + diff --git a/doc-site/docs/reference/types/preparedtransaction.md b/doc-site/docs/reference/types/preparedtransaction.md index e4bdd89cb..a32121cb9 100644 --- a/doc-site/docs/reference/types/preparedtransaction.md +++ b/doc-site/docs/reference/types/preparedtransaction.md @@ -7,10 +7,6 @@ title: PreparedTransaction ```json { - "id": "00000000-0000-0000-0000-000000000000", - "domain": "", - "to": null, - "transaction": {}, "states": {} } ``` diff --git a/toolkit/go/pkg/pldapi/peerinfo.go b/toolkit/go/pkg/pldapi/peerinfo.go index 6b0e010f3..a7498e51c 100644 --- a/toolkit/go/pkg/pldapi/peerinfo.go +++ b/toolkit/go/pkg/pldapi/peerinfo.go @@ -19,11 +19,12 @@ package pldapi import "github.com/kaleido-io/paladin/toolkit/pkg/tktypes" type PeerInfo struct { - Name string `docstruct:"PeerInfo" json:"name"` - Stats PeerStats `docstruct:"PeerInfo" json:"stats"` - Activated tktypes.Timestamp `docstruct:"PeerInfo" json:"activated"` - Outbound map[string]any `docstruct:"PeerInfo" json:"outbound,omitempty"` - OutboundError error `docstruct:"PeerInfo" json:"outboundError,omitempty"` + Name string `docstruct:"PeerInfo" json:"name"` + Stats PeerStats `docstruct:"PeerInfo" json:"stats"` + Activated tktypes.Timestamp `docstruct:"PeerInfo" json:"activated"` + OutboundTransport string `docstruct:"PeerInfo" json:"outboundTransport,omitempty"` + Outbound map[string]any `docstruct:"PeerInfo" json:"outbound,omitempty"` + OutboundError error `docstruct:"PeerInfo" json:"outboundError,omitempty"` } type PeerStats struct { diff --git a/toolkit/go/pkg/pldclient/transport.go b/toolkit/go/pkg/pldclient/transport.go index 048c92437..8f8c30588 100644 --- a/toolkit/go/pkg/pldclient/transport.go +++ b/toolkit/go/pkg/pldclient/transport.go @@ -46,6 +46,14 @@ var transportInfo = &rpcModuleInfo{ Inputs: []string{"transportName"}, Output: "transportDetailsStr", }, + "transport_peers": { + Inputs: []string{}, + Output: "peers", + }, + "transport_peerInfo": { + Inputs: []string{"nodeName"}, + Output: "peer", + }, }, } diff --git a/toolkit/go/pkg/reference/reference.go b/toolkit/go/pkg/reference/reference.go index 615f9a5ce..d032cb245 100644 --- a/toolkit/go/pkg/reference/reference.go +++ b/toolkit/go/pkg/reference/reference.go @@ -113,6 +113,7 @@ var allTypes = []interface{}{ pldapi.IndexedEvent{}, pldapi.EventWithData{}, pldapi.ABIDecodedData{}, + pldapi.PeerInfo{}, tktypes.JSONFormatOptions(""), pldapi.StateStatusQualifier(""), query.QueryJSON{ diff --git a/toolkit/go/pkg/tkmsgs/en_descriptions.go b/toolkit/go/pkg/tkmsgs/en_descriptions.go index 88ae59b70..9d1a64932 100644 --- a/toolkit/go/pkg/tkmsgs/en_descriptions.go +++ b/toolkit/go/pkg/tkmsgs/en_descriptions.go @@ -243,3 +243,22 @@ var ( OnChainLocationLogIndex = ffm("OnChainLocation.logIndex", "The log index within the transaction of the event") ActiveFlagActive = ffm("ActiveFlag.active", "When querying with an activeFilter of 'any' or 'inactive', this boolean shows if the entry/property is active or not") ) + +// pldclient/transport.go +var ( + PeerInfoName = ffm("PeerInfo.name", "The name of the peer node") + PeerInfoStats = ffm("PeerInfo.stats", "Statistics for the outbound and inbound data transfer") + PeerInfoActivated = ffm("PeerInfo.activated", "The time when the peer was activated by an attempt to send data, or data arriving on a transport from this peer") + PeerInfoOutbound = ffm("PeerInfo.outbound", "Transport specific information about an established outbound connection to the peer. Omitted if the peer does not have an established outbound connection") + PeerInfoOutboundTransport = ffm("PeerInfo.outboundTransport", "The name of the transport selected for outbound connection to the peer. Omitted if no attempt to send data has occurred for this peer") + PeerInfoOutboundError = ffm("PeerInfo.outboundError", "Contains an error if attempting to send data, and the transport connection failed") + + PeerStatsSentMsgs = ffm("PeerStats.sentMsgs", "Count of messages sent since activation of this peer") + PeerStatsReceivedMsgs = ffm("PeerStats.receivedMsgs", "Count of messages received since activation of this peer") + PeerStatsSentBytes = ffm("PeerStats.sentBytes", "Count of payload bytes sent since activation of this peer (does not include header data)") + PeerStatsReceivedBytes = ffm("PeerStats.receivedBytes", "Count of payload bytes received since activation of this peer (does not include header data)") + PeerStatsLastSend = ffm("PeerStats.lastSend", "Timestamp of the last send to this peer") + PeerStatsLastReceive = ffm("PeerStats.lastReceive", "Timestamp of the last receive from this peer") + PeerStatsReliableHighestSent = ffm("PeerStats.reliableHighestSent", "Outbound reliable messages are assigned a sequence. This is the highest sequence sent to the peer since activation") + PeerStatsReliableAckBase = ffm("PeerStats.reliableAckBase", "Outbound reliable messages are assigned a sequence. This is the lowest sequence that has not received an acknowledgement from the peer") +) From 95d72ebd9c8f8d310679d6aed82b353c37b36cf6 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Mon, 6 Jan 2025 09:10:33 -0500 Subject: [PATCH 37/41] Remove unused fields after activate/deactivate API update Signed-off-by: Peter Broadhurst --- .../grpc/internal/grpctransport/outbound_conn.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/transports/grpc/internal/grpctransport/outbound_conn.go b/transports/grpc/internal/grpctransport/outbound_conn.go index a4f153876..3fd5e0d65 100644 --- a/transports/grpc/internal/grpctransport/outbound_conn.go +++ b/transports/grpc/internal/grpctransport/outbound_conn.go @@ -28,14 +28,12 @@ import ( ) type outboundConn struct { - t *grpcTransport - nodeName string - client proto.PaladinGRPCTransportClient - peerInfo PeerInfo - sendLock sync.Mutex - waiting int - connError error - stream grpc.ClientStreamingClient[proto.Message, proto.Empty] + t *grpcTransport + nodeName string + client proto.PaladinGRPCTransportClient + peerInfo PeerInfo + sendLock sync.Mutex + stream grpc.ClientStreamingClient[proto.Message, proto.Empty] } func (t *grpcTransport) newConnection(ctx context.Context, nodeName string, transportDetailsJSON string) (oc *outboundConn, peerInfoJSON []byte, err error) { From b06c28e04c47192c228129c45ca76d6d7b693c6c Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Mon, 6 Jan 2025 12:27:25 -0500 Subject: [PATCH 38/41] Update naming from review Signed-off-by: Peter Broadhurst --- core/go/internal/plugins/transports.go | 16 ++++----- core/go/internal/plugins/transports_test.go | 12 +++---- .../transaction_flow_mutators.go | 10 +----- core/go/internal/transportmgr/peer.go | 4 +-- core/go/internal/transportmgr/peer_test.go | 12 +++---- .../internal/transportmgr/transport_test.go | 8 ++--- .../go/pkg/plugintk/plugin_type_transport.go | 28 +++++++-------- .../plugintk/plugin_type_transport_test.go | 24 ++++++------- toolkit/proto/protos/service.proto | 8 ++--- toolkit/proto/protos/to_transport.proto | 8 ++--- .../internal/grpctransport/grpc_transport.go | 8 ++--- .../grpctransport/grpc_transport_test.go | 4 +-- .../grpctransport/tls_verifier_test.go | 36 +++++++++---------- 13 files changed, 85 insertions(+), 93 deletions(-) diff --git a/core/go/internal/plugins/transports.go b/core/go/internal/plugins/transports.go index 08702c50f..2de1a8b73 100644 --- a/core/go/internal/plugins/transports.go +++ b/core/go/internal/plugins/transports.go @@ -130,14 +130,14 @@ func (br *TransportBridge) GetLocalDetails(ctx context.Context, req *prototk.Get return } -func (br *TransportBridge) ActivateNode(ctx context.Context, req *prototk.ActivateNodeRequest) (res *prototk.ActivateNodeResponse, err error) { +func (br *TransportBridge) ActivatePeer(ctx context.Context, req *prototk.ActivatePeerRequest) (res *prototk.ActivatePeerResponse, err error) { err = br.toPlugin.RequestReply(ctx, func(dm plugintk.PluginMessage[prototk.TransportMessage]) { - dm.Message().RequestToTransport = &prototk.TransportMessage_ActivateNode{ActivateNode: req} + dm.Message().RequestToTransport = &prototk.TransportMessage_ActivatePeer{ActivatePeer: req} }, func(dm plugintk.PluginMessage[prototk.TransportMessage]) bool { - if r, ok := dm.Message().ResponseFromTransport.(*prototk.TransportMessage_ActivateNodeRes); ok { - res = r.ActivateNodeRes + if r, ok := dm.Message().ResponseFromTransport.(*prototk.TransportMessage_ActivatePeerRes); ok { + res = r.ActivatePeerRes } return res != nil }, @@ -145,14 +145,14 @@ func (br *TransportBridge) ActivateNode(ctx context.Context, req *prototk.Activa return } -func (br *TransportBridge) DeactivateNode(ctx context.Context, req *prototk.DeactivateNodeRequest) (res *prototk.DeactivateNodeResponse, err error) { +func (br *TransportBridge) DeactivatePeer(ctx context.Context, req *prototk.DeactivatePeerRequest) (res *prototk.DeactivatePeerResponse, err error) { err = br.toPlugin.RequestReply(ctx, func(dm plugintk.PluginMessage[prototk.TransportMessage]) { - dm.Message().RequestToTransport = &prototk.TransportMessage_DeactivateNode{DeactivateNode: req} + dm.Message().RequestToTransport = &prototk.TransportMessage_DeactivatePeer{DeactivatePeer: req} }, func(dm plugintk.PluginMessage[prototk.TransportMessage]) bool { - if r, ok := dm.Message().ResponseFromTransport.(*prototk.TransportMessage_DeactivateNodeRes); ok { - res = r.DeactivateNodeRes + if r, ok := dm.Message().ResponseFromTransport.(*prototk.TransportMessage_DeactivatePeerRes); ok { + res = r.DeactivatePeerRes } return res != nil }, diff --git a/core/go/internal/plugins/transports_test.go b/core/go/internal/plugins/transports_test.go index 555d3560c..331c1ed2c 100644 --- a/core/go/internal/plugins/transports_test.go +++ b/core/go/internal/plugins/transports_test.go @@ -128,13 +128,13 @@ func TestTransportRequestsOK(t *testing.T) { GetLocalDetails: func(ctx context.Context, gldr *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) { return &prototk.GetLocalDetailsResponse{TransportDetails: "endpoint stuff"}, nil }, - ActivateNode: func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + ActivatePeer: func(ctx context.Context, anr *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { assert.Equal(t, "node1", anr.NodeName) - return &prototk.ActivateNodeResponse{PeerInfoJson: `{"endpoint": "stuff"}`}, nil + return &prototk.ActivatePeerResponse{PeerInfoJson: `{"endpoint": "stuff"}`}, nil }, - DeactivateNode: func(ctx context.Context, danr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + DeactivatePeer: func(ctx context.Context, danr *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) { assert.Equal(t, "node1", danr.NodeName) - return &prototk.DeactivateNodeResponse{}, nil + return &prototk.DeactivatePeerResponse{}, nil }, } @@ -184,12 +184,12 @@ func TestTransportRequestsOK(t *testing.T) { assert.NotNil(t, smr) assert.Equal(t, "endpoint stuff", gldr.TransportDetails) - anr, err := transportAPI.ActivateNode(ctx, &prototk.ActivateNodeRequest{NodeName: "node1"}) + anr, err := transportAPI.ActivatePeer(ctx, &prototk.ActivatePeerRequest{NodeName: "node1"}) require.NoError(t, err) assert.NotNil(t, anr) assert.Equal(t, `{"endpoint": "stuff"}`, anr.PeerInfoJson) - danr, err := transportAPI.DeactivateNode(ctx, &prototk.DeactivateNodeRequest{NodeName: "node1"}) + danr, err := transportAPI.DeactivatePeer(ctx, &prototk.DeactivatePeerRequest{NodeName: "node1"}) require.NoError(t, err) assert.NotNil(t, danr) diff --git a/core/go/internal/privatetxnmgr/transaction_flow_mutators.go b/core/go/internal/privatetxnmgr/transaction_flow_mutators.go index 9b14c99d2..a1b40c3f0 100644 --- a/core/go/internal/privatetxnmgr/transaction_flow_mutators.go +++ b/core/go/internal/privatetxnmgr/transaction_flow_mutators.go @@ -112,16 +112,8 @@ func (tf *transactionFlow) applyTransactionAssembledEvent(ctx context.Context, e } tf.status = "assembled" tf.writeAndLockStates(ctx) - //allow assembly thread to proceed - _, err := tf.GetStateDistributions(ctx) - if err != nil { - log.L(ctx).Errorf("Error getting state distributions: %s", err) - // we need to proceed with unblocking the assembleCoordinator. It wont have a chance to distribute the states to the remote assembler nodes - // so they may fail to assemble or may assemble a transaction that does not get endorsed but that is always a possibility anyway and the - // engine's retry strategy and the eventually consistent distribution of states will mean we will eventually process - // all transactions if they are valid - } + //allow assembly thread to proceed tf.assembleCoordinator.Complete(event.AssembleRequestID) } diff --git a/core/go/internal/transportmgr/peer.go b/core/go/internal/transportmgr/peer.go index c35fe1b0d..794467a3d 100644 --- a/core/go/internal/transportmgr/peer.go +++ b/core/go/internal/transportmgr/peer.go @@ -129,7 +129,7 @@ func (tm *transportManager) reapPeer(p *peer) { if p.senderStarted.Load() { // Holding the lock while activating/deactivating ensures we never dual-activate in the transport - if _, err := p.transport.api.DeactivateNode(p.ctx, &prototk.DeactivateNodeRequest{ + if _, err := p.transport.api.DeactivatePeer(p.ctx, &prototk.DeactivatePeerRequest{ NodeName: p.Name, }); err != nil { log.L(p.ctx).Warnf("peer %s returned deactivation error: %s", p.Name, err) @@ -228,7 +228,7 @@ func (p *peer) startSender() (string, error) { } // Activate the connection (the deactivate is deferred to the send loop) - res, err := p.transport.api.ActivateNode(p.ctx, &prototk.ActivateNodeRequest{ + res, err := p.transport.api.ActivatePeer(p.ctx, &prototk.ActivatePeerRequest{ NodeName: p.Name, TransportDetails: remoteTransportDetails, }) diff --git a/core/go/internal/transportmgr/peer_test.go b/core/go/internal/transportmgr/peer_test.go index 093cf63c1..e2b204cd8 100644 --- a/core/go/internal/transportmgr/peer_test.go +++ b/core/go/internal/transportmgr/peer_test.go @@ -344,7 +344,7 @@ func TestActivateFail(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport) defer done() - tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { + tp.Functions.ActivatePeer = func(ctx context.Context, anr *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { return nil, fmt.Errorf("pop") } @@ -358,8 +358,8 @@ func TestActivateBadPeerInfo(t *testing.T) { ctx, tm, tp, done := newTestTransport(t, false, mockGoodTransport) defer done() - tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { - return &prototk.ActivateNodeResponse{PeerInfoJson: "!{ not valid JSON"}, nil + tp.Functions.ActivatePeer = func(ctx context.Context, anr *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { + return &prototk.ActivatePeerResponse{PeerInfoJson: "!{ not valid JSON"}, nil } p, err := tm.getPeer(ctx, "node2", true) @@ -380,10 +380,10 @@ func TestDeactivateFail(t *testing.T) { tm.peerInactivityTimeout = 1 * time.Second tm.quiesceTimeout = 1 * time.Millisecond - tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { - return &prototk.ActivateNodeResponse{PeerInfoJson: `{"endpoint":"some.url"}`}, nil + tp.Functions.ActivatePeer = func(ctx context.Context, anr *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { + return &prototk.ActivatePeerResponse{PeerInfoJson: `{"endpoint":"some.url"}`}, nil } - tp.Functions.DeactivateNode = func(ctx context.Context, dnr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { + tp.Functions.DeactivatePeer = func(ctx context.Context, dnr *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) { return nil, fmt.Errorf("pop") } diff --git a/core/go/internal/transportmgr/transport_test.go b/core/go/internal/transportmgr/transport_test.go index 9afd062e3..c1048da05 100644 --- a/core/go/internal/transportmgr/transport_test.go +++ b/core/go/internal/transportmgr/transport_test.go @@ -129,11 +129,11 @@ func mockEmptyReliableMsgs(mc *mockComponents, conf *pldconf.TransportManagerCon } func mockActivateDeactivateOk(tp *testPlugin) { - tp.Functions.ActivateNode = func(ctx context.Context, anr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { - return &prototk.ActivateNodeResponse{PeerInfoJson: `{"endpoint":"some.url"}`}, nil + tp.Functions.ActivatePeer = func(ctx context.Context, anr *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { + return &prototk.ActivatePeerResponse{PeerInfoJson: `{"endpoint":"some.url"}`}, nil } - tp.Functions.DeactivateNode = func(ctx context.Context, dnr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { - return &prototk.DeactivateNodeResponse{}, nil + tp.Functions.DeactivatePeer = func(ctx context.Context, dnr *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) { + return &prototk.DeactivatePeerResponse{}, nil } } diff --git a/toolkit/go/pkg/plugintk/plugin_type_transport.go b/toolkit/go/pkg/plugintk/plugin_type_transport.go index 1c7b3ffa1..dc9fae1a3 100644 --- a/toolkit/go/pkg/plugintk/plugin_type_transport.go +++ b/toolkit/go/pkg/plugintk/plugin_type_transport.go @@ -28,8 +28,8 @@ type TransportAPI interface { ConfigureTransport(context.Context, *prototk.ConfigureTransportRequest) (*prototk.ConfigureTransportResponse, error) SendMessage(context.Context, *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) GetLocalDetails(context.Context, *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) - ActivateNode(context.Context, *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) - DeactivateNode(context.Context, *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) + ActivatePeer(context.Context, *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) + DeactivatePeer(context.Context, *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) } type TransportCallbacks interface { @@ -130,13 +130,13 @@ func (th *transportHandler) RequestToPlugin(ctx context.Context, iReq PluginMess resMsg := &prototk.TransportMessage_GetLocalDetailsRes{} resMsg.GetLocalDetailsRes, err = th.api.GetLocalDetails(ctx, input.GetLocalDetails) res.ResponseFromTransport = resMsg - case *prototk.TransportMessage_ActivateNode: - resMsg := &prototk.TransportMessage_ActivateNodeRes{} - resMsg.ActivateNodeRes, err = th.api.ActivateNode(ctx, input.ActivateNode) + case *prototk.TransportMessage_ActivatePeer: + resMsg := &prototk.TransportMessage_ActivatePeerRes{} + resMsg.ActivatePeerRes, err = th.api.ActivatePeer(ctx, input.ActivatePeer) res.ResponseFromTransport = resMsg - case *prototk.TransportMessage_DeactivateNode: - resMsg := &prototk.TransportMessage_DeactivateNodeRes{} - resMsg.DeactivateNodeRes, err = th.api.DeactivateNode(ctx, input.DeactivateNode) + case *prototk.TransportMessage_DeactivatePeer: + resMsg := &prototk.TransportMessage_DeactivatePeerRes{} + resMsg.DeactivatePeerRes, err = th.api.DeactivatePeer(ctx, input.DeactivatePeer) res.ResponseFromTransport = resMsg default: err = i18n.NewError(ctx, tkmsgs.MsgPluginUnsupportedRequest, input) @@ -170,8 +170,8 @@ type TransportAPIFunctions struct { ConfigureTransport func(context.Context, *prototk.ConfigureTransportRequest) (*prototk.ConfigureTransportResponse, error) SendMessage func(context.Context, *prototk.SendMessageRequest) (*prototk.SendMessageResponse, error) GetLocalDetails func(context.Context, *prototk.GetLocalDetailsRequest) (*prototk.GetLocalDetailsResponse, error) - ActivateNode func(context.Context, *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) - DeactivateNode func(context.Context, *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) + ActivatePeer func(context.Context, *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) + DeactivatePeer func(context.Context, *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) } type TransportAPIBase struct { @@ -190,10 +190,10 @@ func (tb *TransportAPIBase) GetLocalDetails(ctx context.Context, req *prototk.Ge return callPluginImpl(ctx, req, tb.Functions.GetLocalDetails) } -func (tb *TransportAPIBase) ActivateNode(ctx context.Context, req *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { - return callPluginImpl(ctx, req, tb.Functions.ActivateNode) +func (tb *TransportAPIBase) ActivatePeer(ctx context.Context, req *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { + return callPluginImpl(ctx, req, tb.Functions.ActivatePeer) } -func (tb *TransportAPIBase) DeactivateNode(ctx context.Context, req *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { - return callPluginImpl(ctx, req, tb.Functions.DeactivateNode) +func (tb *TransportAPIBase) DeactivatePeer(ctx context.Context, req *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) { + return callPluginImpl(ctx, req, tb.Functions.DeactivatePeer) } diff --git a/toolkit/go/pkg/plugintk/plugin_type_transport_test.go b/toolkit/go/pkg/plugintk/plugin_type_transport_test.go index 155e54483..ae7b5b587 100644 --- a/toolkit/go/pkg/plugintk/plugin_type_transport_test.go +++ b/toolkit/go/pkg/plugintk/plugin_type_transport_test.go @@ -139,37 +139,37 @@ func TestTransportFunction_GetLocalDetails(t *testing.T) { }) } -func TestTransportFunction_ActivateNode(t *testing.T) { +func TestTransportFunction_ActivatePeer(t *testing.T) { _, exerciser, funcs, _, _, done := setupTransportTests(t) defer done() // InitTransport - paladin to transport - funcs.ActivateNode = func(ctx context.Context, cdr *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { - return &prototk.ActivateNodeResponse{}, nil + funcs.ActivatePeer = func(ctx context.Context, cdr *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { + return &prototk.ActivatePeerResponse{}, nil } exerciser.doExchangeToPlugin(func(req *prototk.TransportMessage) { - req.RequestToTransport = &prototk.TransportMessage_ActivateNode{ - ActivateNode: &prototk.ActivateNodeRequest{}, + req.RequestToTransport = &prototk.TransportMessage_ActivatePeer{ + ActivatePeer: &prototk.ActivatePeerRequest{}, } }, func(res *prototk.TransportMessage) { - assert.IsType(t, &prototk.TransportMessage_ActivateNodeRes{}, res.ResponseFromTransport) + assert.IsType(t, &prototk.TransportMessage_ActivatePeerRes{}, res.ResponseFromTransport) }) } -func TestTransportFunction_DeactivateNode(t *testing.T) { +func TestTransportFunction_DeactivatePeer(t *testing.T) { _, exerciser, funcs, _, _, done := setupTransportTests(t) defer done() // InitTransport - paladin to transport - funcs.DeactivateNode = func(ctx context.Context, cdr *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { - return &prototk.DeactivateNodeResponse{}, nil + funcs.DeactivatePeer = func(ctx context.Context, cdr *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) { + return &prototk.DeactivatePeerResponse{}, nil } exerciser.doExchangeToPlugin(func(req *prototk.TransportMessage) { - req.RequestToTransport = &prototk.TransportMessage_DeactivateNode{ - DeactivateNode: &prototk.DeactivateNodeRequest{}, + req.RequestToTransport = &prototk.TransportMessage_DeactivatePeer{ + DeactivatePeer: &prototk.DeactivatePeerRequest{}, } }, func(res *prototk.TransportMessage) { - assert.IsType(t, &prototk.TransportMessage_DeactivateNodeRes{}, res.ResponseFromTransport) + assert.IsType(t, &prototk.TransportMessage_DeactivatePeerRes{}, res.ResponseFromTransport) }) } diff --git a/toolkit/proto/protos/service.proto b/toolkit/proto/protos/service.proto index 387588e30..f9983c4de 100644 --- a/toolkit/proto/protos/service.proto +++ b/toolkit/proto/protos/service.proto @@ -105,16 +105,16 @@ message TransportMessage { ConfigureTransportRequest configure_transport = 1010; SendMessageRequest send_message = 1020; GetLocalDetailsRequest get_local_details = 1030; - ActivateNodeRequest activate_node = 1040; - DeactivateNodeRequest deactivate_node = 1050; + ActivatePeerRequest activate_peer = 1040; + DeactivatePeerRequest deactivate_peer = 1050; } oneof response_from_transport { ConfigureTransportResponse configure_transport_res = 1011; SendMessageResponse send_message_res = 1021; GetLocalDetailsResponse get_local_details_res = 1031; - ActivateNodeResponse activate_node_res = 1041; - DeactivateNodeResponse deactivate_node_res = 1051; + ActivatePeerResponse activate_peer_res = 1041; + DeactivatePeerResponse deactivate_peer_res = 1051; } // Request/reply exchanges initiated by the transport, to the paladin node diff --git a/toolkit/proto/protos/to_transport.proto b/toolkit/proto/protos/to_transport.proto index 967698d8e..16a600c17 100644 --- a/toolkit/proto/protos/to_transport.proto +++ b/toolkit/proto/protos/to_transport.proto @@ -33,20 +33,20 @@ message SendMessageRequest { message SendMessageResponse { } -message ActivateNodeRequest { +message ActivatePeerRequest { string node_name = 1; string transport_details = 2; } -message ActivateNodeResponse { +message ActivatePeerResponse { string peer_info_json = 1; } -message DeactivateNodeRequest { +message DeactivatePeerRequest { string node_name = 1; } -message DeactivateNodeResponse { +message DeactivatePeerResponse { } message GetLocalDetailsRequest { diff --git a/transports/grpc/internal/grpctransport/grpc_transport.go b/transports/grpc/internal/grpctransport/grpc_transport.go index 5e358e6c9..e38d70f2e 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport.go +++ b/transports/grpc/internal/grpctransport/grpc_transport.go @@ -224,7 +224,7 @@ func (t *grpcTransport) getTransportDetails(ctx context.Context, node string) (t return transportDetails, nil } -func (t *grpcTransport) ActivateNode(ctx context.Context, req *prototk.ActivateNodeRequest) (*prototk.ActivateNodeResponse, error) { +func (t *grpcTransport) ActivatePeer(ctx context.Context, req *prototk.ActivatePeerRequest) (*prototk.ActivatePeerResponse, error) { t.connLock.Lock() defer t.connLock.Unlock() @@ -240,12 +240,12 @@ func (t *grpcTransport) ActivateNode(ctx context.Context, req *prototk.ActivateN return nil, err } t.outboundConnections[req.NodeName] = oc - return &prototk.ActivateNodeResponse{ + return &prototk.ActivatePeerResponse{ PeerInfoJson: string(peerInfoJSON), }, nil } -func (t *grpcTransport) DeactivateNode(ctx context.Context, req *prototk.DeactivateNodeRequest) (*prototk.DeactivateNodeResponse, error) { +func (t *grpcTransport) DeactivatePeer(ctx context.Context, req *prototk.DeactivatePeerRequest) (*prototk.DeactivatePeerResponse, error) { t.connLock.Lock() defer t.connLock.Unlock() @@ -257,7 +257,7 @@ func (t *grpcTransport) DeactivateNode(ctx context.Context, req *prototk.Deactiv delete(t.outboundConnections, req.NodeName) } - return &prototk.DeactivateNodeResponse{}, nil + return &prototk.DeactivatePeerResponse{}, nil } func (t *grpcTransport) getConnection(nodeName string) *outboundConn { diff --git a/transports/grpc/internal/grpctransport/grpc_transport_test.go b/transports/grpc/internal/grpctransport/grpc_transport_test.go index ce949b7d0..972b143bf 100644 --- a/transports/grpc/internal/grpctransport/grpc_transport_test.go +++ b/transports/grpc/internal/grpctransport/grpc_transport_test.go @@ -204,7 +204,7 @@ func TestActivateBadTransportDetails(t *testing.T) { }) defer done() - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: `{"endpoint": false}`, }) @@ -219,7 +219,7 @@ func TestConnectBadTransport(t *testing.T) { plugin1, _, done := newSuccessfulVerifiedConnection(t) defer done() - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: `{"endpoint": "WRONG:::::::"}`, }) diff --git a/transports/grpc/internal/grpctransport/tls_verifier_test.go b/transports/grpc/internal/grpctransport/tls_verifier_test.go index 3bf4fca1d..16006d477 100644 --- a/transports/grpc/internal/grpctransport/tls_verifier_test.go +++ b/transports/grpc/internal/grpctransport/tls_verifier_test.go @@ -175,7 +175,7 @@ func newSuccessfulVerifiedConnection(t *testing.T, setup ...func(callbacks1, cal fn(callbacks1, callbacks2) } - deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) + deactivate := testActivatePeer(t, plugin1, "node2", transportDetails2) return plugin1, plugin2, func() { deactivate() @@ -184,11 +184,11 @@ func newSuccessfulVerifiedConnection(t *testing.T, setup ...func(callbacks1, cal } } -func testActivateNode(t *testing.T, sender *grpcTransport, remoteNodeName string, transportDetails *PublishedTransportDetails) func() { +func testActivatePeer(t *testing.T, sender *grpcTransport, remoteNodeName string, transportDetails *PublishedTransportDetails) func() { ctx := context.Background() - res, err := sender.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + res, err := sender.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: remoteNodeName, TransportDetails: tktypes.JSONString(transportDetails).Pretty(), }) @@ -196,7 +196,7 @@ func testActivateNode(t *testing.T, sender *grpcTransport, remoteNodeName string assert.NotNil(t, res) return func() { - res, err := sender.DeactivateNode(ctx, &prototk.DeactivateNodeRequest{ + res, err := sender.DeactivatePeer(ctx, &prototk.DeactivatePeerRequest{ NodeName: remoteNodeName, }) assert.NoError(t, err) @@ -269,7 +269,7 @@ func TestGRPCTransport_DirectCertVerificationWithKeyRotation_OK(t *testing.T) { } // Connect and send from plugin1 to plugin2 - deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) + deactivate := testActivatePeer(t, plugin1, "node2", transportDetails2) defer deactivate() sendRes, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ Node: "node2", @@ -325,7 +325,7 @@ func TestGRPCTransport_CACertVerificationWithSubjectRegex_OK(t *testing.T) { mockRegistry(callbacks2, ptds) // Connect and send from plugin1 to plugin2 - deactivate := testActivateNode(t, plugin1, "node2", transportDetails2) + deactivate := testActivatePeer(t, plugin1, "node2", transportDetails2) defer deactivate() sendRes, err := plugin1.SendMessage(ctx, &prototk.SendMessageRequest{ Node: "node2", @@ -371,7 +371,7 @@ func TestGRPCTransport_CAServerWrongCA(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err = plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err = plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -408,7 +408,7 @@ func TestGRPCTransport_CAClientWrongCA(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err = plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err = plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -436,7 +436,7 @@ func TestGRPCTransport_DirectCertVerification_WrongIssuerServer(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -464,7 +464,7 @@ func TestGRPCTransport_DirectCertVerification_WrongIssuerClient(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -489,7 +489,7 @@ func TestGRPCTransport_DirectCertVerification_BadIssuersServer(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -515,7 +515,7 @@ func TestGRPCTransport_SubjectRegexpMismatch(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -539,7 +539,7 @@ func TestGRPCTransport_ClientWrongNode(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node3", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -567,7 +567,7 @@ func TestGRPCTransport_BadTransportDetails(t *testing.T) { ptds := map[string]*PublishedTransportDetails{"node1": transportDetails1, "node2": transportDetails2} mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -593,7 +593,7 @@ func TestGRPCTransport_BadTransportIssuerPEM(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -617,7 +617,7 @@ func TestGRPCTransport_NodeUnknownToServer(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, map[string]*PublishedTransportDetails{}) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -641,7 +641,7 @@ func TestGRPCTransport_NodeUnknownToClient(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) @@ -666,7 +666,7 @@ func TestGRPCTransport_ServerRejectNoCerts(t *testing.T) { mockRegistry(callbacks1, ptds) mockRegistry(callbacks2, ptds) - _, err := plugin1.ActivateNode(ctx, &prototk.ActivateNodeRequest{ + _, err := plugin1.ActivatePeer(ctx, &prototk.ActivatePeerRequest{ NodeName: "node2", TransportDetails: tktypes.JSONString(transportDetails2).Pretty(), }) From 98619a5ca146b285c17c9434127795b82a546a08 Mon Sep 17 00:00:00 2001 From: Peter Broadhurst Date: Mon, 6 Jan 2025 14:03:52 -0500 Subject: [PATCH 39/41] Remove map order indeterminism in test Signed-off-by: Peter Broadhurst --- .../internal/statemgr/domain_context_test.go | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/core/go/internal/statemgr/domain_context_test.go b/core/go/internal/statemgr/domain_context_test.go index 89e12c38d..21ff0c22c 100644 --- a/core/go/internal/statemgr/domain_context_test.go +++ b/core/go/internal/statemgr/domain_context_test.go @@ -1114,7 +1114,6 @@ func TestExportSnapshot(t *testing.T) { transactionID1 := uuid.New() transactionID2 := uuid.New() - transactionID3 := uuid.New() _, err = dc.UpsertStates( ss.p.DB(), @@ -1124,12 +1123,6 @@ func TestExportSnapshot(t *testing.T) { Data: s1.Data, CreatedBy: &transactionID1, }, - &components.StateUpsert{ - ID: s2.ID, - Schema: schema2.ID(), - Data: s2.Data, - CreatedBy: &transactionID2, - }, ) require.NoError(t, err) @@ -1137,7 +1130,7 @@ func TestExportSnapshot(t *testing.T) { &pldapi.StateLock{ Type: pldapi.StateLockTypeSpend.Enum(), StateID: s2.ID, - Transaction: transactionID3, + Transaction: transactionID2, }, ) assert.NoError(t, err) @@ -1154,11 +1147,6 @@ func TestExportSnapshot(t *testing.T) { { "stateId":"`+s2.ID.String()+`", "transaction":"`+transactionID2.String()+`", - "type":"create" - }, - { - "stateId":"`+s2.ID.String()+`", - "transaction":"`+transactionID3.String()+`", "type":"spend" } ], @@ -1167,11 +1155,6 @@ func TestExportSnapshot(t *testing.T) { "id": "`+s1.ID.String()+`", "schema": "`+s1.Schema.String()+`", "data": `+s1.Data.String()+` - }, - { - "id": "`+s2.ID.String()+`", - "schema": "`+s2.Schema.String()+`", - "data": `+s2.Data.String()+` } ] }`, string(json), From 3f43002d704e7b4ca5f6b1627d6f5844927b0e7f Mon Sep 17 00:00:00 2001 From: Andrew Richardson Date: Wed, 8 Jan 2025 11:04:58 -0500 Subject: [PATCH 40/41] Requesting node may not be the one that prepares a transaction Fix the bond example to retrieve prepared transactions from the node that prepared them, which may not be the node that requested them. TODO: need to revisit if "prepare" is still a valid flow, as it's become complex Signed-off-by: Andrew Richardson --- example/bond/src/index.ts | 29 +++++++++++++++-------------- sdk/typescript/src/domains/noto.ts | 8 ++------ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/example/bond/src/index.ts b/example/bond/src/index.ts index 2b18c9bfc..da0b51032 100644 --- a/example/bond/src/index.ts +++ b/example/bond/src/index.ts @@ -192,13 +192,15 @@ async function main(): Promise { // Prepare the payment transfer (investor -> custodian) logger.log("Preparing payment transfer..."); - const paymentTransfer = await notoCash - .using(paladin3) - .prepareTransfer(investor, { - to: bondCustodian, - amount: 100, - data: "0x", - }); + txID = await notoCash.using(paladin3).prepareTransfer(investor, { + to: bondCustodian, + amount: 100, + data: "0x", + }); + const paymentTransfer = await paladin1.pollForPreparedTransaction( + txID, + 10000 + ); if (paymentTransfer === undefined) { logger.error("Failed!"); return false; @@ -213,13 +215,12 @@ async function main(): Promise { // Prepare the bond transfer (custodian -> investor) // Requires 2 calls to prepare, as the Noto transaction spawns a Pente transaction to wrap it logger.log("Preparing bond transfer (step 1/2)..."); - const bondTransfer1 = await notoBond - .using(paladin2) - .prepareTransfer(bondCustodian, { - to: investor, - amount: 100, - data: "0x", - }); + txID = await notoBond.using(paladin2).prepareTransfer(bondCustodian, { + to: investor, + amount: 100, + data: "0x", + }); + const bondTransfer1 = await paladin2.pollForPreparedTransaction(txID, 10000); if (bondTransfer1 === undefined) { logger.error("Failed!"); return false; diff --git a/sdk/typescript/src/domains/noto.ts b/sdk/typescript/src/domains/noto.ts index 351eb53e0..6ec1cf0bd 100644 --- a/sdk/typescript/src/domains/noto.ts +++ b/sdk/typescript/src/domains/noto.ts @@ -160,8 +160,8 @@ export class NotoInstance { return this.paladin.pollForReceipt(txID, this.options.pollTimeout); } - async prepareTransfer(from: PaladinVerifier, data: NotoTransferParams) { - const txID = await this.paladin.prepareTransaction({ + prepareTransfer(from: PaladinVerifier, data: NotoTransferParams) { + return this.paladin.prepareTransaction({ type: TransactionType.PRIVATE, abi: notoPrivateJSON.abi, function: "transfer", @@ -172,10 +172,6 @@ export class NotoInstance { to: data.to.lookup, }, }); - return this.paladin.pollForPreparedTransaction( - txID, - this.options.pollTimeout - ); } async approveTransfer( From a58fe5d2a3776702d4e514ebc844d5c8ea37c767 Mon Sep 17 00:00:00 2001 From: Andrew Richardson Date: Wed, 8 Jan 2025 11:12:58 -0500 Subject: [PATCH 41/41] pente: propagate required signer for prepared transactions Signed-off-by: Andrew Richardson --- .../java/io/kaleido/paladin/pente/domain/PenteDomain.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/domains/pente/src/main/java/io/kaleido/paladin/pente/domain/PenteDomain.java b/domains/pente/src/main/java/io/kaleido/paladin/pente/domain/PenteDomain.java index b6254ae09..b44b68c30 100644 --- a/domains/pente/src/main/java/io/kaleido/paladin/pente/domain/PenteDomain.java +++ b/domains/pente/src/main/java/io/kaleido/paladin/pente/domain/PenteDomain.java @@ -403,10 +403,11 @@ protected CompletableFuture prepareTransact var transitionTX = ToDomain.PreparedTransaction.newBuilder(). setFunctionAbiJson(transitionABI). setParamsJson(new ObjectMapper().writeValueAsString(params)); - var result = ToDomain.PrepareTransactionResponse.newBuilder(). - setTransaction(transitionTX); + var result = ToDomain.PrepareTransactionResponse.newBuilder(); if (request.getTransaction().getIntent() == ToDomain.TransactionSpecification.Intent.PREPARE_TRANSACTION) { + transitionTX.setRequiredSigner(request.getTransaction().getFrom()); + // TODO: can the transitionHash be reused from a prior step instead of being computed again? var tx = new PenteTransaction(this, request.getTransaction()); var transitionHash = tx.eip712TypedDataEndorsementPayload( @@ -442,6 +443,8 @@ protected CompletableFuture prepareTransact result.setMetadata(new ObjectMapper().writeValueAsString(metadata)); } + + result.setTransaction(transitionTX); return CompletableFuture.completedFuture(result.build()); } catch (Exception e) { return CompletableFuture.failedFuture(e);