Skip to content

Commit

Permalink
fix: start physical replication return unexpected messages
Browse files Browse the repository at this point in the history
Physical streaming replication will return switch timeline result directly if startpoint is a history timeline switch point. Server even does not entry copy both mode.

Closes: #60
  • Loading branch information
krisdiano authored and jackc committed Nov 11, 2023
1 parent 9ed16cb commit 1627ab1
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions pglogrepl.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,28 @@ type StartReplicationOptions struct {
PluginArgs []string
}

type errEndTimeline struct {
nextTli int64
nextTliStartpos LSN
}

func (e errEndTimeline) Error() string {
return "start replication with a switch point"
}

func (e errEndTimeline) ErrEndTimeline() (int64, LSN) {
return e.nextTli, e.nextTliStartpos
}

func IsErrEndTimeline(err error) (int64, LSN, bool) {
e, ok := err.(interface{ ErrEndTimeline() (int64, LSN) })
if !ok {
return 0, 0, false
}
nextTli, nextTliStartpos := e.ErrEndTimeline()
return nextTli, nextTliStartpos, true
}

// StartReplication begins the replication process by executing the START_REPLICATION command.
func StartReplication(ctx context.Context, conn *pgconn.PgConn, slotName string, startLSN LSN, options StartReplicationOptions) error {
var timelineString string
Expand All @@ -303,6 +325,10 @@ func StartReplication(ctx context.Context, conn *pgconn.PgConn, slotName string,
return fmt.Errorf("failed to send START_REPLICATION: %w", err)
}

var (
nextTli int64
nextTliStartpos LSN
)
for {
msg, err := conn.ReceiveMessage(ctx)
if err != nil {
Expand All @@ -316,6 +342,32 @@ func StartReplication(ctx context.Context, conn *pgconn.PgConn, slotName string,
case *pgproto3.CopyBothResponse:
// This signals the start of the replication stream.
return nil
case *pgproto3.RowDescription:
if options.Mode != PhysicalReplication {
return fmt.Errorf("received row RowDescription message in logical replication")
}
if len(msg.Fields) != 2 || string(msg.Fields[0].Name) != "next_tli" || string(msg.Fields[1].Name) != "next_tli_startpos" {
return fmt.Errorf("expected next timeline row description message")
}
case *pgproto3.DataRow:
if cnt := len(msg.Values); cnt != 2 {
return fmt.Errorf("expected next_tli and next_tli_startpos, got %d fields", cnt)
}
tmpNextTli, tmpNextTliStartpos := string(msg.Values[0]), string(msg.Values[1])
nextTli, err = strconv.ParseInt(tmpNextTli, 10, 64)
if err != nil {
return err
}
nextTliStartpos, err = ParseLSN(tmpNextTliStartpos)
if err != nil {
return err
}
case *pgproto3.CommandComplete:
case *pgproto3.ReadyForQuery:
// if no next timeline switch result, maybe it was left on the connection
if nextTli > 0 && nextTliStartpos > 0 {
return errEndTimeline{nextTli: nextTli, nextTliStartpos: nextTliStartpos}
}
default:
return fmt.Errorf("unexpected response type: %T", msg)
}
Expand Down

0 comments on commit 1627ab1

Please sign in to comment.