From eb2eb6766536808a3f972654803d88f8501031f7 Mon Sep 17 00:00:00 2001 From: Eugene Kabanov Date: Sat, 11 Jan 2025 03:25:56 +0200 Subject: [PATCH] Various fixes for light forward syncing algorithm. (#6831) * Fix light forward syncing from finishing with inconsistent state. * Update copyright year. * Add block retention period check --- .../block_clearance.nim | 11 +++-- beacon_chain/nimbus_beacon_node.nim | 9 ++-- beacon_chain/sync/sync_overseer.nim | 47 +++++++++++++++++-- 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/beacon_chain/consensus_object_pools/block_clearance.nim b/beacon_chain/consensus_object_pools/block_clearance.nim index 1d010acec8..dededc0162 100644 --- a/beacon_chain/consensus_object_pools/block_clearance.nim +++ b/beacon_chain/consensus_object_pools/block_clearance.nim @@ -1,5 +1,5 @@ # beacon_chain -# Copyright (c) 2018-2024 Status Research & Development GmbH +# Copyright (c) 2018-2025 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). @@ -509,7 +509,10 @@ proc addBackfillBlockData*( withBlck(parentBlock): forkyBlck.message.state_root clearanceBlock = BlockSlotId.init(parent.bid, forkyBlck.message.slot) - updateFlags1 = dag.updateFlags + {skipLastStateRootCalculation} + updateFlags1 = dag.updateFlags + # TODO (cheatfate): {skipLastStateRootCalculation} flag here could + # improve performance by 100%, but this approach needs some + # improvements, which is unclear. if not updateState(dag, dag.clearanceState, clearanceBlock, true, cache, updateFlags1): @@ -517,7 +520,9 @@ proc addBackfillBlockData*( "database corrupt?", clearanceBlock = shortLog(clearanceBlock) return err(VerifierError.MissingParent) - dag.clearanceState.setStateRoot(trustedStateRoot) + # dag.clearanceState.setStateRoot(trustedStateRoot) + # TODO (cheatfate): This is last part of previous TODO comment, we should + # set state's `root` to block's `state_root`. let proposerVerifyTick = Moment.now() diff --git a/beacon_chain/nimbus_beacon_node.nim b/beacon_chain/nimbus_beacon_node.nim index e0fbc91201..7e772037cd 100644 --- a/beacon_chain/nimbus_beacon_node.nim +++ b/beacon_chain/nimbus_beacon_node.nim @@ -381,11 +381,14 @@ proc initFullNode( else: dag.tail.slot - func getUntrustedBackfillSlot(): Slot = + proc getUntrustedBackfillSlot(): Slot = if clist.tail.isSome(): clist.tail.get().blck.slot else: - dag.tail.slot + getLocalWallSlot() + + func getUntrustedFrontfillSlot(): Slot = + getFirstSlotAtFinalizedEpoch() func getFrontfillSlot(): Slot = max(dag.frontfill.get(BlockId()).slot, dag.horizon) @@ -528,7 +531,7 @@ proc initFullNode( dag.cfg.DENEB_FORK_EPOCH, dag.cfg.MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS, SyncQueueKind.Backward, getLocalHeadSlot, getLocalWallSlot, getFirstSlotAtFinalizedEpoch, getUntrustedBackfillSlot, - getFrontfillSlot, isWithinWeakSubjectivityPeriod, + getUntrustedFrontfillSlot, isWithinWeakSubjectivityPeriod, clistPivotSlot, untrustedBlockVerifier, maxHeadAge = 0, shutdownEvent = node.shutdownEvent, flags = syncManagerFlags) diff --git a/beacon_chain/sync/sync_overseer.nim b/beacon_chain/sync/sync_overseer.nim index 738e8cb398..cbbab92849 100644 --- a/beacon_chain/sync/sync_overseer.nim +++ b/beacon_chain/sync/sync_overseer.nim @@ -1,5 +1,5 @@ # beacon_chain -# Copyright (c) 2018-2024 Status Research & Development GmbH +# Copyright (c) 2018-2025 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). @@ -148,6 +148,22 @@ proc isWithinWeakSubjectivityPeriod( is_within_weak_subjectivity_period( dag.cfg, currentSlot, dag.headState, checkpoint) +proc getLastBlockRetentionPeriodSlot(overseer: SyncOverseerRef): Slot = + let + dag = overseer.consensusManager.dag + currentSlot = overseer.beaconClock.now().slotOrZero() + slotsCount = dag.cfg.MIN_EPOCHS_FOR_BLOCK_REQUESTS * SLOTS_PER_EPOCH + if currentSlot < slotsCount: + GENESIS_SLOT + else: + currentSlot - slotsCount + +proc isWithinBlockRetentionPeriod( + overseer: SyncOverseerRef, + slot: Slot +): bool = + slot >= overseer.getLastBlockRetentionPeriodSlot() + proc isUntrustedBackfillEmpty(clist: ChainListRef): bool = clist.tail.isNone() @@ -184,7 +200,7 @@ proc updatePerformance(overseer: SyncOverseerRef, startTick: Moment, # Update status string overseer.statusMsg = Opt.some( - timeleft.toTimeLeftString() & " (" & + "fill: " & timeleft.toTimeLeftString() & " (" & (done * 100).formatBiggestFloat(ffDecimal, 2) & "%) " & overseer.avgSpeed.formatBiggestFloat(ffDecimal, 4) & "slots/s (" & $dag.head.slot & ")") @@ -418,6 +434,15 @@ proc mainLoop*( clist = overseer.clist currentSlot = overseer.beaconClock.now().slotOrZero() + info "Sync overseer starting", + wall_slot = currentSlot, + dag_head_slot = dag.head.slot, + dag_finalized_head_slot = dag.finalizedHead.slot, + dag_horizon = dag.horizon(), + dag_backfill_slot = dag.backfill.slot, + untrusted_tail = shortLog(clist.tail), + untrusted_head = shortLog(clist.head) + if overseer.isWithinWeakSubjectivityPeriod(currentSlot): # Starting forward sync manager/monitor. overseer.forwardSync.start() @@ -433,10 +458,12 @@ proc mainLoop*( if not(isUntrustedBackfillEmpty(clist)): let headSlot = clist.head.get().slot - if not(overseer.isWithinWeakSubjectivityPeriod(headSlot)): + if not(overseer.isWithinBlockRetentionPeriod(headSlot)): # Light forward sync file is too old. - warn "Light client sync was started too long time ago", - current_slot = currentSlot, backfill_data_slot = headSlot + warn "Light forward sync was started too long time ago", + current_slot = currentSlot, + backfill_data_slot = headSlot, + retention_period_slot = overseer.getLastBlockRetentionPeriodSlot() if overseer.config.longRangeSync == LongRangeSyncMode.Lenient: # Starting forward sync manager/monitor only. @@ -451,6 +478,13 @@ proc mainLoop*( altair_start_slot = dag.cfg.ALTAIR_FORK_EPOCH.start_slot quit 1 + if overseer.isWithinBlockRetentionPeriod(dagHead.slot): + fatal "Current database head slot is not in the block retention " & + "period range", + head_slot = dagHead.slot, + retention_period_slot = overseer.getLastBlockRetentionPeriodSlot() + quit 1 + if isUntrustedBackfillEmpty(clist): overseer.untrustedInProgress = true @@ -458,6 +492,7 @@ proc mainLoop*( await overseer.initUntrustedSync() except CancelledError: return + # We need to update pivot slot to enable timeleft calculation. overseer.untrustedSync.updatePivot(overseer.clist.tail.get().slot) # Note: We should not start forward sync manager! @@ -486,6 +521,8 @@ proc mainLoop*( quit 1 overseer.untrustedInProgress = false + # Reset status bar + overseer.statusMsg = Opt.none(string) # When we finished state rebuilding process - we could start forward # SyncManager which could perform finish sync.