From 178883e1d0103ad9e9269b47d0c1dddeca6e6185 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert Date: Wed, 17 Jul 2024 17:41:26 +0200 Subject: [PATCH] Do not crash on block gap in `displaced_leaves_after_finalizing` (#4997) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the merge of #4922 we saw failing zombienet tests with the following error: ``` 2024-07-09 10:30:09 Error applying finality to block (0xb9e1d3d9cb2047fe61667e28a0963e0634a7b29781895bc9ca40c898027b4c09, 56685): UnknownBlock: Header was not found in the database: 0x0000000000000000000000000000000000000000000000000000000000000000 2024-07-09 10:30:09 GRANDPA voter error: could not complete a round on disk: UnknownBlock: Header was not found in the database: 0x0000000000000000000000000000000000000000000000000000000000000000 ``` [Example](https://gitlab.parity.io/parity/mirrors/polkadot-sdk/-/jobs/6662262) The crashing situation is warp-sync related. After warp syncing, it can happen that there are gaps in block ancestry where we don't have the header. At the same time, the genesis hash is in the set of leaves. In `displaced_leaves_after_finalizing` we then iterate from the finalized block backwards until we hit an unknown block, crashing the node. This PR makes the detection of displaced branches resilient against unknown block in the finalized block chain. cc @nazar-pc (github won't let me request a review from you) --------- Co-authored-by: Bastian Köcher Co-authored-by: command-bot <> --- Cargo.lock | 2 +- prdoc/pr_4997.prdoc | 20 +++ substrate/client/db/src/lib.rs | 155 ++++++++++++++++++ substrate/primitives/blockchain/Cargo.toml | 2 +- .../primitives/blockchain/src/backend.rs | 62 +++++-- substrate/primitives/blockchain/src/lib.rs | 2 + 6 files changed, 228 insertions(+), 15 deletions(-) create mode 100644 prdoc/pr_4997.prdoc diff --git a/Cargo.lock b/Cargo.lock index ad75224fefdc4..aad15fe033d1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19855,7 +19855,6 @@ name = "sp-blockchain" version = "28.0.0" dependencies = [ "futures", - "log", "parity-scale-codec", "parking_lot 0.12.3", "schnellru", @@ -19866,6 +19865,7 @@ dependencies = [ "sp-runtime", "sp-state-machine", "thiserror", + "tracing", ] [[package]] diff --git a/prdoc/pr_4997.prdoc b/prdoc/pr_4997.prdoc new file mode 100644 index 0000000000000..25620a7e63ea8 --- /dev/null +++ b/prdoc/pr_4997.prdoc @@ -0,0 +1,20 @@ +# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0 +# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json + +title: Do not crash on block gap in displaced_leaves_after_finalizing + +doc: + - audience: + - Node Operator + - Node Dev + description: | + After recent changes, crashes where occuring when calculating displaced branches after a block was finalized. + The reason are block gaps in the finalized chain. When encountering unknown blocks, the node was panicking. + This PR introduces changes to tolerate unknown blocks. Leafs that are separated by a gap from the to-be-finalized + block are not marked as displaced. + +crates: +- name: sc-client-db + bump: none +- name: sp-blockchain + bump: patch diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs index e95cd9e4ad5fd..acd165d916135 100644 --- a/substrate/client/db/src/lib.rs +++ b/substrate/client/db/src/lib.rs @@ -2613,6 +2613,35 @@ pub(crate) mod tests { Ok(header.hash()) } + pub fn insert_disconnected_header( + backend: &Backend, + number: u64, + parent_hash: H256, + extrinsics_root: H256, + best: bool, + ) -> H256 { + use sp_runtime::testing::Digest; + + let digest = Digest::default(); + let header = + Header { number, parent_hash, state_root: Default::default(), digest, extrinsics_root }; + + let mut op = backend.begin_operation().unwrap(); + + op.set_block_data( + header.clone(), + Some(vec![]), + None, + None, + if best { NewBlockState::Best } else { NewBlockState::Normal }, + ) + .unwrap(); + + backend.commit_operation(op).unwrap(); + + header.hash() + } + pub fn insert_header_no_head( backend: &Backend, number: u64, @@ -3112,6 +3141,123 @@ pub(crate) mod tests { } } + #[test] + fn displaced_leaves_after_finalizing_works_with_disconnect() { + // In this test we will create a situation that can typically happen after warp sync. + // The situation looks like this: + // g -> -> a3 -> a4 + // Basically there is a gap of unimported blocks at some point in the chain. + let backend = Backend::::new_test(1000, 100); + let blockchain = backend.blockchain(); + let genesis_number = 0; + let genesis_hash = + insert_header(&backend, genesis_number, Default::default(), None, Default::default()); + + let a3_number = 3; + let a3_hash = insert_disconnected_header( + &backend, + a3_number, + H256::from([200; 32]), + H256::from([1; 32]), + true, + ); + + let a4_number = 4; + let a4_hash = + insert_disconnected_header(&backend, a4_number, a3_hash, H256::from([2; 32]), true); + { + let displaced = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); + } + + { + let displaced = + blockchain.displaced_leaves_after_finalizing(a4_hash, a4_number).unwrap(); + assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); + } + + // Import block a1 which has the genesis block as parent. + // g -> a1 -> -> a3(f) -> a4 + let a1_number = 1; + let a1_hash = insert_disconnected_header( + &backend, + a1_number, + genesis_hash, + H256::from([123; 32]), + false, + ); + { + let displaced = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, a1_hash]); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); + } + + // Import block b1 which has the genesis block as parent. + // g -> a1 -> -> a3(f) -> a4 + // \-> b1 + let b1_number = 1; + let b1_hash = insert_disconnected_header( + &backend, + b1_number, + genesis_hash, + H256::from([124; 32]), + false, + ); + { + let displaced = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, a1_hash, b1_hash]); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); + } + + // If branch of b blocks is higher in number than a branch, we + // should still not prune disconnected leafs. + // g -> a1 -> -> a3(f) -> a4 + // \-> b1 -> b2 ----------> b3 ----> b4 -> b5 + let b2_number = 2; + let b2_hash = + insert_disconnected_header(&backend, b2_number, b1_hash, H256::from([40; 32]), false); + let b3_number = 3; + let b3_hash = + insert_disconnected_header(&backend, b3_number, b2_hash, H256::from([41; 32]), false); + let b4_number = 4; + let b4_hash = + insert_disconnected_header(&backend, b4_number, b3_hash, H256::from([42; 32]), false); + let b5_number = 5; + let b5_hash = + insert_disconnected_header(&backend, b5_number, b4_hash, H256::from([43; 32]), false); + { + let displaced = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(blockchain.leaves().unwrap(), vec![b5_hash, a4_hash, a1_hash]); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); + } + + // Even though there is a disconnect, diplace should still detect + // branches above the block gap. + // /-> c4 + // g -> a1 -> -> a3 -> a4(f) + // \-> b1 -> b2 ----------> b3 -> b4 -> b5 + let c4_number = 4; + let c4_hash = + insert_disconnected_header(&backend, c4_number, a3_hash, H256::from([44; 32]), false); + { + let displaced = + blockchain.displaced_leaves_after_finalizing(a4_hash, a4_number).unwrap(); + assert_eq!(blockchain.leaves().unwrap(), vec![b5_hash, a4_hash, c4_hash, a1_hash]); + assert_eq!(displaced.displaced_leaves, vec![(c4_number, c4_hash)]); + assert_eq!(displaced.displaced_blocks, vec![c4_hash]); + } + } #[test] fn displaced_leaves_after_finalizing_works() { let backend = Backend::::new_test(1000, 100); @@ -3156,6 +3302,15 @@ pub(crate) mod tests { assert_eq!(displaced_a3.displaced_leaves, vec![]); assert_eq!(displaced_a3.displaced_blocks, vec![]); } + { + // Finalized block is above leaves and not imported yet. + // We will not be able to make a connection, + // nothing can be marked as displaced. + let displaced = + blockchain.displaced_leaves_after_finalizing(H256::from([57; 32]), 10).unwrap(); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); + } // fork from genesis: 2 prong. let b1_number = 1; diff --git a/substrate/primitives/blockchain/Cargo.toml b/substrate/primitives/blockchain/Cargo.toml index aedd720612c33..bd0daaf63c055 100644 --- a/substrate/primitives/blockchain/Cargo.toml +++ b/substrate/primitives/blockchain/Cargo.toml @@ -19,7 +19,6 @@ targets = ["x86_64-unknown-linux-gnu"] [dependencies] codec = { features = ["derive"], workspace = true } futures = { workspace = true } -log = { workspace = true, default-features = true } parking_lot = { workspace = true, default-features = true } schnellru = { workspace = true } thiserror = { workspace = true } @@ -29,3 +28,4 @@ sp-consensus = { workspace = true, default-features = true } sp-database = { workspace = true, default-features = true } sp-runtime = { workspace = true, default-features = true } sp-state-machine = { workspace = true, default-features = true } +tracing = { workspace = true, default-features = true } diff --git a/substrate/primitives/blockchain/src/backend.rs b/substrate/primitives/blockchain/src/backend.rs index a928217d58854..2accd4dad12c5 100644 --- a/substrate/primitives/blockchain/src/backend.rs +++ b/substrate/primitives/blockchain/src/backend.rs @@ -17,7 +17,6 @@ //! Substrate blockchain trait -use log::warn; use parking_lot::RwLock; use sp_runtime::{ generic::BlockId, @@ -25,6 +24,7 @@ use sp_runtime::{ Justifications, }; use std::collections::{btree_set::BTreeSet, HashMap, VecDeque}; +use tracing::{debug, warn}; use crate::{ error::{Error, Result}, @@ -228,6 +228,7 @@ pub trait Backend: // // FIXME #1558 only issue this warning when not on a dead fork warn!( + target: crate::LOG_TARGET, "Block {:?} exists in chain but not found when following all leaves backwards", base_hash, ); @@ -254,16 +255,35 @@ pub trait Backend: ) -> std::result::Result, Error> { let leaves = self.leaves()?; + debug!( + target: crate::LOG_TARGET, + ?leaves, + %finalized_block_hash, + ?finalized_block_number, + "Checking for displaced leaves after finalization." + ); + // If we have only one leaf there are no forks, and we can return early. if finalized_block_number == Zero::zero() || leaves.len() == 1 { return Ok(DisplacedLeavesAfterFinalization::default()) } - // Store hashes of finalized blocks for quick checking later, the last block if the + // Store hashes of finalized blocks for quick checking later, the last block is the // finalized one let mut finalized_chain = VecDeque::new(); - finalized_chain - .push_front(MinimalBlockMetadata::from(&self.header_metadata(finalized_block_hash)?)); + let current_finalized = match self.header_metadata(finalized_block_hash) { + Ok(metadata) => metadata, + Err(Error::UnknownBlock(_)) => { + debug!( + target: crate::LOG_TARGET, + hash = ?finalized_block_hash, + "Tried to fetch unknown block, block ancestry has gaps." + ); + return Ok(DisplacedLeavesAfterFinalization::default()); + }, + Err(e) => Err(e)?, + }; + finalized_chain.push_front(MinimalBlockMetadata::from(¤t_finalized)); // Local cache is a performance optimization in case of finalized block deep below the // tip of the chain with a lot of leaves above finalized block @@ -273,6 +293,7 @@ pub trait Backend: displaced_leaves: Vec::with_capacity(leaves.len()), displaced_blocks: Vec::with_capacity(leaves.len()), }; + let mut displaced_blocks_candidates = Vec::new(); for leaf_hash in leaves { @@ -306,11 +327,11 @@ pub trait Backend: continue; } - // Otherwise the whole leaf branch needs to be pruned, track it all the way to the - // point of branching from the finalized chain - result.displaced_leaves.push((leaf_number, leaf_hash)); - result.displaced_blocks.extend(displaced_blocks_candidates.drain(..)); - result.displaced_blocks.push(current_header_metadata.hash); + // We reuse `displaced_blocks_candidates` to store the current metadata. + // This block is not displaced if there is a gap in the ancestry. We + // check for this gap later. + displaced_blocks_candidates.push(current_header_metadata.hash); + // Collect the rest of the displaced blocks of leaf branch for distance_from_finalized in 1_u32.. { // Find block at `distance_from_finalized` from finalized block @@ -318,9 +339,22 @@ pub trait Backend: match finalized_chain.iter().rev().nth(distance_from_finalized as usize) { Some(header) => (header.number, header.hash), None => { - let metadata = MinimalBlockMetadata::from(&self.header_metadata( - finalized_chain.front().expect("Not empty; qed").parent, - )?); + let to_fetch = finalized_chain.front().expect("Not empty; qed"); + let metadata = match self.header_metadata(to_fetch.parent) { + Ok(metadata) => metadata, + Err(Error::UnknownBlock(_)) => { + debug!( + target: crate::LOG_TARGET, + distance_from_finalized, + hash = ?to_fetch.parent, + number = ?to_fetch.number, + "Tried to fetch unknown block, block ancestry has gaps." + ); + break; + }, + Err(e) => Err(e)?, + }; + let metadata = MinimalBlockMetadata::from(&metadata); let result = (metadata.number, metadata.hash); finalized_chain.push_front(metadata); result @@ -336,11 +370,13 @@ pub trait Backend: let parent_hash = current_header_metadata.parent; if finalized_chain_block_hash == parent_hash { // Reached finalized chain, nothing left to do + result.displaced_blocks.extend(displaced_blocks_candidates.drain(..)); + result.displaced_leaves.push((leaf_number, leaf_hash)); break; } // Store displaced block and look deeper for block on finalized chain - result.displaced_blocks.push(parent_hash); + displaced_blocks_candidates.push(parent_hash); current_header_metadata = MinimalBlockMetadata::from(&self.header_metadata(parent_hash)?); } diff --git a/substrate/primitives/blockchain/src/lib.rs b/substrate/primitives/blockchain/src/lib.rs index eabbbcf50d9f2..305b7f6afec19 100644 --- a/substrate/primitives/blockchain/src/lib.rs +++ b/substrate/primitives/blockchain/src/lib.rs @@ -24,3 +24,5 @@ mod header_metadata; pub use backend::*; pub use error::*; pub use header_metadata::*; + +const LOG_TARGET: &str = "db::blockchain";