Skip to content

Commit

Permalink
generalize support for bin range of accounts hash tool (#2649)
Browse files Browse the repository at this point in the history
* generalize support for bin range of accounts hash tool

* fix comments

* clippy

* sort deps

* fmtwq

* pr feedbacks

* Update accounts-db/accounts-hash-cache-tool/src/main.rs

Co-authored-by: Brooks <[email protected]>

* Update accounts-db/accounts-hash-cache-tool/src/main.rs

Co-authored-by: Brooks <[email protected]>

* Update accounts-db/accounts-hash-cache-tool/src/main.rs

Co-authored-by: Brooks <[email protected]>

* pr feedback: rename variable

---------

Co-authored-by: HaoranYi <[email protected]>
Co-authored-by: Brooks <[email protected]>
  • Loading branch information
3 people authored Aug 20, 2024
1 parent 4daf93c commit 951bb04
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 36 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions accounts-db/accounts-hash-cache-tool/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ clap = { workspace = true }
memmap2 = { workspace = true }
rayon = { workspace = true }
solana-accounts-db = { workspace = true }
solana-clap-utils = { workspace = true }
solana-program = { workspace = true }
solana-version = { workspace = true }

Expand Down
71 changes: 35 additions & 36 deletions accounts-db/accounts-hash-cache-tool/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use {
pubkey_bins::PubkeyBinCalculator24, CacheHashDataFileEntry, CacheHashDataFileHeader,
ParsedCacheHashDataFilename,
},
solana_clap_utils::input_parsers::values_of,
solana_program::pubkey::Pubkey,
std::{
cmp::{self, Ordering},
Expand All @@ -20,6 +21,7 @@ use {
iter,
mem::size_of,
num::Saturating,
ops::Range,
path::{Path, PathBuf},
str,
sync::RwLock,
Expand Down Expand Up @@ -146,17 +148,24 @@ fn main() {
),
)
.arg(
Arg::with_name("bin_of_interest")
.long("bin-of-interest")
Arg::with_name("bins_of_interest")
.long("bins-of-interest")
.takes_value(true)
.value_name("INDEX")
.help("Specifies a single bin to diff")
.value_name("BINS")
.min_values(1)
.max_values(2)
.value_delimiter("-")
.require_delimiter(true)
.multiple(false)
.help("Specifies bins to diff")
.long_help(
"Specifies a single bin to diff. \
"Specifies bins to diff. \
When diffing large state that does not fit in memory, \
it may be neccessary to diff a subset at a time. \
Use this arg to limit the state to a single bin. \
The INDEX must be less than --bins."
it may be necessary to diff a subset at a time. \
Use this arg to limit the state to bins of interest. \
This arg takes either a single bin or a bin range. \
A bin range is specified as \"start-end\", where \
\"start\" is inclusive, and \"end\" is exclusive."
),
),
),
Expand Down Expand Up @@ -225,22 +234,20 @@ fn cmd_diff_state(
let path1 = value_t_or_exit!(subcommand_matches, "path1", String);
let path2 = value_t_or_exit!(subcommand_matches, "path2", String);
let num_bins = value_t_or_exit!(subcommand_matches, "bins", usize);
let bin_of_interest =
if let Some(bin_of_interest) = subcommand_matches.value_of("bin_of_interest") {
let bin_of_interest = bin_of_interest
.parse()
.map_err(|err| format!("argument 'bin-of-interest' is not a valid value: {err}"))?;
if bin_of_interest >= num_bins {
return Err(format!(
"argument 'bin-of-interest' must be less than 'bins', \
bins: {num_bins}, bin-of-interest: {bin_of_interest}",
));

let bins_of_interest =
if let Some(bins) = values_of::<usize>(subcommand_matches, "bins_of_interest") {
match bins.len() {
1 => bins[0]..bins[0].saturating_add(1),
2 => bins[0]..bins[1],
_ => {
unreachable!("invalid number of values given to bins_of_interest.")
}
}
Some(bin_of_interest)
} else {
None
0..usize::MAX
};
do_diff_state(path1, path2, num_bins, bin_of_interest)
do_diff_state(path1, path2, num_bins, bins_of_interest)
}

fn do_inspect(file: impl AsRef<Path>, force: bool) -> Result<(), String> {
Expand Down Expand Up @@ -510,7 +517,7 @@ fn do_diff_state(
dir1: impl AsRef<Path>,
dir2: impl AsRef<Path>,
num_bins: usize,
bin_of_interest: Option<usize>,
bins_of_interest: Range<usize>,
) -> Result<(), String> {
let extract = |dir: &Path| -> Result<_, String> {
let files =
Expand All @@ -521,7 +528,7 @@ fn do_diff_state(
} = extract_binned_latest_entries_in(
files.iter().map(|file| &file.path),
num_bins,
bin_of_interest,
&bins_of_interest,
)
.map_err(|err| format!("failed to extract entries: {err}"))?;
let num_accounts: usize = latest_entries.iter().map(|bin| bin.len()).sum();
Expand Down Expand Up @@ -699,7 +706,7 @@ fn extract_latest_entries_in(file: impl AsRef<Path>) -> Result<LatestEntriesInfo
let BinnedLatestEntriesInfo {
latest_entries,
capitalization,
} = extract_binned_latest_entries_in(iter::once(file), NUM_BINS, None)?;
} = extract_binned_latest_entries_in(iter::once(file), NUM_BINS, &(0..usize::MAX))?;
assert_eq!(latest_entries.len(), NUM_BINS);
let mut latest_entries = Vec::from(latest_entries);
let latest_entries = latest_entries.pop().unwrap();
Expand All @@ -715,20 +722,15 @@ fn extract_latest_entries_in(file: impl AsRef<Path>) -> Result<LatestEntriesInfo
/// If there are multiple entries for a pubkey, only the latest is returned.
///
/// - `num_bins` specifies the number of bins to split the entries into.
/// - `bin_of_interest`, if Some, only returns entries from that bin.
/// Otherwise, returns all entries. Must be less than `num_bins`.
/// - `bins_of_interest` specifies the bin_range in which the entries should be returned.
///
/// Note: `files` must be sorted in ascending order, as insertion order is
/// relied on to guarantee the latest entry is returned.
fn extract_binned_latest_entries_in(
files: impl IntoIterator<Item = impl AsRef<Path>>,
num_bins: usize,
bin_of_interest: Option<usize>,
bins_of_interest: &Range<usize>,
) -> Result<BinnedLatestEntriesInfo, String> {
if let Some(bin_of_interest) = bin_of_interest {
assert!(bin_of_interest < num_bins);
}

let binner = PubkeyBinCalculator24::new(num_bins);
let mut entries: Box<_> = iter::repeat_with(HashMap::default).take(num_bins).collect();
let mut capitalization = Saturating(0);
Expand All @@ -744,11 +746,8 @@ fn extract_binned_latest_entries_in(

let num_entries = scan_mmap(&mmap, |entry| {
let bin = binner.bin_from_pubkey(&entry.pubkey);
if let Some(bin_of_interest) = bin_of_interest {
// Is this the bin of interest? If not, skip it.
if bin != bin_of_interest {
return;
}
if !bins_of_interest.contains(&bin) {
return;
}

capitalization += entry.lamports;
Expand Down

0 comments on commit 951bb04

Please sign in to comment.