Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ledger-tool: verify: add --record-slots and --verify-slots #34246

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 147 additions & 2 deletions ledger-tool/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use {
solana_ledger::{
blockstore::{create_new_ledger, Blockstore},
blockstore_options::{AccessType, LedgerColumnOptions},
blockstore_processor::ProcessSlotCallback,
use_snapshot_archives_at_startup,
},
solana_measure::{measure, measure::Measure},
Expand Down Expand Up @@ -88,7 +89,7 @@ use {
str::FromStr,
sync::{
atomic::{AtomicBool, Ordering},
Arc, RwLock,
Arc, Mutex, RwLock,
},
},
};
Expand Down Expand Up @@ -1060,6 +1061,28 @@ fn main() {
information that went into computing the completed bank's bank hash. \
The file will be written within <LEDGER_DIR>/bank_hash_details/",
),
)
.arg(
Arg::with_name("record_slots")
.long("record-slots")
.default_value("slots.json")
.value_name("FILENAME")
.help("Record slots to a file"),
)
.arg(
Arg::with_name("verify_slots")
.long("verify-slots")
.default_value("slots.json")
.value_name("FILENAME")
.help("Verify slots match contents of file"),
)
.arg(
Arg::with_name("record_slots_config")
.long("record-slots-config")
.default_value("hash-only")
.possible_values(&["hash-only", "accounts"])
.requires("record_slots")
.help("In the slot recording, include bank details or not"),
),
)
.subcommand(
Expand Down Expand Up @@ -1621,7 +1644,114 @@ fn main() {
},
);

let process_options = parse_process_options(&ledger_path, arg_matches);
let mut process_options = parse_process_options(&ledger_path, arg_matches);

// .default_value() does not work with .conflicts_with() in clap 2.33
// .conflicts_with("verify_slots")
// https://github.com/clap-rs/clap/issues/1605#issuecomment-722326915
// So open-code the conflicts_with() here
if arg_matches.occurrences_of("record_slots") > 0
&& arg_matches.occurrences_of("verify_slots") > 0
{
eprintln!(
"error: The argument '--verify-slots <FILENAME>' cannot be used with '--record-slots <FILENAME>'"
);
exit(1);
}

let (slot_callback, record_slots_file, recorded_slots) = if arg_matches
.occurrences_of("record_slots")
> 0
{
let filename = Path::new(arg_matches.value_of_os("record_slots").unwrap());

let file = File::create(filename).unwrap_or_else(|err| {
eprintln!("Unable to write to file: {}: {:#}", filename.display(), err);
exit(1);
});

let include_bank =
match arg_matches.value_of("record_slots_config").unwrap() {
"hash-only" => false,
"accounts" => true,
_ => unreachable!(),
};

let slot_hashes = Arc::new(Mutex::new(Vec::new()));

let slot_callback = Arc::new({
let slots = Arc::clone(&slot_hashes);
move |bank: &Bank| {
let slot_details = if include_bank {
bank_hash_details::BankHashSlotDetails::try_from(bank).unwrap()
} else {
bank_hash_details::BankHashSlotDetails {
slot: bank.slot(),
bank_hash: bank.hash().to_string(),
..Default::default()
}
};

slots.lock().unwrap().push(slot_details);
}
});

(
Some(slot_callback as ProcessSlotCallback),
Some(file),
Some(slot_hashes),
)
} else if arg_matches.occurrences_of("verify_slots") > 0 {
let filename = Path::new(arg_matches.value_of_os("verify_slots").unwrap());

let file = File::open(filename).unwrap_or_else(|err| {
eprintln!("Unable to read file: {}: {err:#}", filename.display());
exit(1);
});

let reader = std::io::BufReader::new(file);

let details: bank_hash_details::BankHashDetails =
serde_json::from_reader(reader).unwrap_or_else(|err| {
eprintln!("Error loading slots file: {err:#}");
exit(1);
});

let slots = Arc::new(Mutex::new(details.bank_hash_details));

let slot_callback = Arc::new(move |bank: &Bank| {
if slots.lock().unwrap().is_empty() {
error!(
"Expected slot: not found got slot: {} hash: {}",
bank.slot(),
bank.hash()
);
} else {
let bank_hash_details::BankHashSlotDetails {
slot: expected_slot,
bank_hash: expected_hash,
..
} = slots.lock().unwrap().remove(0);
if bank.slot() != expected_slot
|| bank.hash().to_string() != expected_hash
{
error!("Expected slot: {expected_slot} hash: {expected_hash} got slot: {} hash: {}",
bank.slot(), bank.hash());
} else {
info!(
"Expected slot: {expected_slot} hash: {expected_hash} correct"
);
}
}
});

(Some(slot_callback as ProcessSlotCallback), None, None)
} else {
(None, None, None)
};

process_options.slot_callback = slot_callback;

let print_accounts_stats = arg_matches.is_present("print_accounts_stats");
let write_bank_file = arg_matches.is_present("write_bank_file");
let genesis_config = open_genesis_config_by(&ledger_path, arg_matches);
Expand Down Expand Up @@ -1653,6 +1783,21 @@ fn main() {
})
.ok();
}

if let Some(recorded_slots_file) = record_slots_file {
if let Ok(recorded_slots) = recorded_slots.clone().unwrap().lock() {
let bank_hashes =
bank_hash_details::BankHashDetails::new(recorded_slots.to_vec());

// writing the json file ends up with a syscall for each number, comma, indentation etc.
// use BufWriter to speed things up

let writer = std::io::BufWriter::new(recorded_slots_file);

serde_json::to_writer_pretty(writer, &bank_hashes).unwrap();
Comment on lines +1792 to +1797
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is duplicate code with stuff we have in bank_hash_details.rs, would be nice to single source that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fn write_bank_hash_details_file() write a single bank, and here we are writing a Vec<BankHashDetails>. Also fn write_bank_hash_details_file() is used from fn dump_then_repair_correct_slots() as well.

I am not sure how I can consolidate the two.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm yeah, maybe not as easy as I thought. I'd say we can leave it for now

The long term vision is implementing something like BankHashDetailsStreamer. This object could hold the serializer, and then dump out details bank-by-bank. Will definitely help out if someone runs the command with "accounts" mode on a large number of slots.

You could specify the config at the start, the object could retain the config. Then, when you call stream_bank() or whatever we decide to name it, the streamer extracts appropriate details (just the bank hash OR all the hashes + accounts).

To the end user, this is mostly invisible tho. So, like I said, I'm happy to push the PR as-is and then potentially go refactor after the fact

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, that makes sense. For virtual machine debugging it would be super-useful to have the transactions in the file too. Let start with this and go from there

}
}

exit_signal.store(true, Ordering::Relaxed);
system_monitor_service.join().unwrap();
}
Expand Down
11 changes: 9 additions & 2 deletions ledger/src/blockstore_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -676,15 +676,17 @@ pub enum BlockstoreProcessorError {
RootBankWithMismatchedCapitalization(Slot),
}

/// Callback for accessing bank state while processing the blockstore
pub type ProcessCallback = Arc<dyn Fn(&Bank) + Sync + Send>;
/// Callback for accessing bank state after each slot is confirmed while
/// processing the blockstore
pub type ProcessSlotCallback = Arc<dyn Fn(&Bank) + Sync + Send>;

#[derive(Default, Clone)]
pub struct ProcessOptions {
/// Run PoH, transaction signature and other transaction verifications on the entries.
pub run_verification: bool,
pub full_leader_cache: bool,
pub halt_at_slot: Option<Slot>,
pub slot_callback: Option<ProcessSlotCallback>,
pub new_hard_forks: Option<Vec<Slot>>,
pub debug_keys: Option<Arc<HashSet<Pubkey>>>,
pub account_indexes: AccountSecondaryIndexes,
Expand Down Expand Up @@ -1810,6 +1812,11 @@ fn process_single_slot(
result?
}
bank.freeze(); // all banks handled by this routine are created from complete slots

if let Some(slot_callback) = &opts.slot_callback {
slot_callback(bank);
}

if blockstore.is_primary_access() {
blockstore.insert_bank_hash(bank.slot(), bank.hash(), false);
}
Expand Down
35 changes: 29 additions & 6 deletions runtime/src/bank/bank_hash_details.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use {
};

#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub(crate) struct BankHashDetails {
pub struct BankHashDetails {
/// The client version
pub version: String,
/// The encoding format for account data buffers
Expand Down Expand Up @@ -66,17 +66,35 @@ impl BankHashDetails {
}

/// The components that go into a bank hash calculation for a single bank/slot.
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub(crate) struct BankHashSlotDetails {
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, Default)]
pub struct BankHashSlotDetails {
pub slot: Slot,
pub bank_hash: String,
#[serde(skip_serializing_if = "String::is_empty")]
#[serde(default)]
pub parent_bank_hash: String,
#[serde(skip_serializing_if = "String::is_empty")]
#[serde(default)]
pub accounts_delta_hash: String,
#[serde(skip_serializing_if = "u64_is_zero")]
#[serde(default)]
pub signature_count: u64,
#[serde(skip_serializing_if = "String::is_empty")]
#[serde(default)]
pub last_blockhash: String,
#[serde(skip_serializing_if = "bankhashaccounts_is_empty")]
#[serde(default)]
pub accounts: BankHashAccounts,
}

fn u64_is_zero(val: &u64) -> bool {
*val == 0
}

fn bankhashaccounts_is_empty(accounts: &BankHashAccounts) -> bool {
accounts.accounts.is_empty()
}

impl BankHashSlotDetails {
pub fn new(
slot: Slot,
Expand Down Expand Up @@ -141,8 +159,8 @@ impl TryFrom<&Bank> for BankHashSlotDetails {

/// Wrapper around a Vec<_> to facilitate custom Serialize/Deserialize trait
/// implementations.
#[derive(Clone, Debug, Eq, PartialEq)]
pub(crate) struct BankHashAccounts {
#[derive(Clone, Debug, Eq, PartialEq, Default)]
pub struct BankHashAccounts {
pub accounts: Vec<PubkeyHashAccount>,
}

Expand Down Expand Up @@ -257,7 +275,12 @@ pub fn write_bank_hash_details_file(bank: &Bank) -> std::result::Result<(), Stri
_ = std::fs::create_dir_all(parent_dir);
let file = std::fs::File::create(&path)
.map_err(|err| format!("Unable to create file at {}: {err}", path.display()))?;
serde_json::to_writer_pretty(file, &details)

// writing the json file ends up with a syscall for each number, comma, indentation etc.
// use BufWriter to speed things up
let writer = std::io::BufWriter::new(file);

serde_json::to_writer_pretty(writer, &details)
.map_err(|err| format!("Unable to write file at {}: {err}", path.display()))?;
}
Ok(())
Expand Down
Loading