Skip to content

Commit

Permalink
Adds bench for scan_pubkeys()
Browse files Browse the repository at this point in the history
  • Loading branch information
brooksprumo committed Jul 11, 2024
1 parent 6d30530 commit 6d05c36
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 5 deletions.
132 changes: 128 additions & 4 deletions accounts-db/benches/bench_accounts_file.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
#![allow(clippy::arithmetic_side_effects)]
use {
criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput},
rand::{distributions::WeightedIndex, prelude::*, SeedableRng as _},
rand_chacha::ChaChaRng,
solana_accounts_db::{
append_vec::{self, AppendVec},
tiered_storage::hot::HotStorageWriter,
accounts_file::StorageAccess,
append_vec::{self, AppendVec, SCAN_BUFFER_SIZE_WITHOUT_DATA},
tiered_storage::{
file::TieredReadableFile,
hot::{HotStorageReader, HotStorageWriter},
},
},
solana_sdk::{
account::AccountSharedData, clock::Slot, pubkey::Pubkey,
account::{AccountSharedData, ReadableAccount},
clock::Slot,
pubkey::Pubkey,
rent::Rent,
rent_collector::RENT_EXEMPT_RENT_EPOCH,
system_instruction::MAX_PERMITTED_DATA_LENGTH,
},
std::{iter, mem::ManuallyDrop},
};

const ACCOUNTS_COUNTS: [usize; 4] = [
Expand Down Expand Up @@ -87,5 +98,118 @@ fn bench_write_accounts_file(c: &mut Criterion) {
}
}

criterion_group!(benches, bench_write_accounts_file);
fn bench_scan_pubkeys(c: &mut Criterion) {
let mut group = c.benchmark_group("scan_pubkeys");
let temp_dir = tempfile::tempdir().unwrap();

// distribution of account data sizes to use when creating accounts
// 3% of accounts have no data
// 75% of accounts are 165 bytes (a token account)
// 20% of accounts are 200 bytes (a stake account)
// 1% of accounts are 256 kibibytes (pathological case for the scan buffer)
// 1% of accounts are 10 mebibytes (the max size for an account)
let data_sizes = [
0,
165,
200,
SCAN_BUFFER_SIZE_WITHOUT_DATA,
MAX_PERMITTED_DATA_LENGTH as usize,
];
let weights = [3, 75, 20, 1, 1];
let distribution = WeightedIndex::new(&weights).unwrap();

let rent = Rent::default();
let rent_minimum_balances: Vec<_> = data_sizes
.iter()
.map(|data_size| rent.minimum_balance(*data_size))
.collect();

for accounts_count in ACCOUNTS_COUNTS {
group.throughput(Throughput::Elements(accounts_count as u64));
let mut rng = ChaChaRng::seed_from_u64(accounts_count as u64);

let pubkeys: Vec<_> = iter::repeat_with(Pubkey::new_unique)
.take(accounts_count)
.collect();
let accounts: Vec<_> = iter::repeat_with(|| {
let index = distribution.sample(&mut rng);
AccountSharedData::new_rent_epoch(
rent_minimum_balances[index],
data_sizes[index],
&Pubkey::default(),
RENT_EXEMPT_RENT_EPOCH,
)
})
.take(pubkeys.len())
.collect();
let storable_accounts: Vec<_> = iter::zip(&pubkeys, &accounts).collect();

// create an append vec file
let append_vec_path = temp_dir.path().join(format!("append_vec_{accounts_count}"));
_ = std::fs::remove_file(&append_vec_path);
let file_size = accounts
.iter()
.map(|account| append_vec::aligned_stored_size(account.data().len()))
.sum();
let append_vec = AppendVec::new(append_vec_path, true, file_size);
let stored_accounts_info = append_vec
.append_accounts(&(Slot::MAX, storable_accounts.as_slice()), 0)
.unwrap();
assert_eq!(stored_accounts_info.offsets.len(), accounts_count);
append_vec.flush().unwrap();
// Open append vecs for reading here, outside of the bench function, so we don't open lots
// of file handles and run out/crash. We also need to *not* remove the backing file in
// these new append vecs because that would cause double-free (or triple-free here).
// Wrap the append vecs in ManuallyDrop to *not* remove the backing file on drop.
let append_vec_mmap = ManuallyDrop::new(
AppendVec::new_from_file(append_vec.path(), append_vec.len(), StorageAccess::Mmap)
.unwrap()
.0,
);
let append_vec_file = ManuallyDrop::new(
AppendVec::new_from_file(append_vec.path(), append_vec.len(), StorageAccess::File)
.unwrap()
.0,
);

// create a hot storage file
let hot_storage_path = temp_dir
.path()
.join(format!("hot_storage_{accounts_count}"));
_ = std::fs::remove_file(&hot_storage_path);
let mut hot_storage_writer = HotStorageWriter::new(&hot_storage_path).unwrap();
let stored_accounts_info = hot_storage_writer
.write_accounts(&(Slot::MAX, storable_accounts.as_slice()), 0)
.unwrap();
assert_eq!(stored_accounts_info.offsets.len(), accounts_count);
hot_storage_writer.flush().unwrap();
// Similar to the append vec case above, open the hot storage for reading here.
let hot_storage_file = TieredReadableFile::new(&hot_storage_path).unwrap();
let hot_storage_reader = HotStorageReader::new(hot_storage_file).unwrap();

group.bench_function(BenchmarkId::new("append_vec_mmap", accounts_count), |b| {
b.iter(|| {
let mut count = 0;
append_vec_mmap.scan_pubkeys(|_| count += 1);
assert_eq!(count, accounts_count);
});
});
group.bench_function(BenchmarkId::new("append_vec_file", accounts_count), |b| {
b.iter(|| {
let mut count = 0;
append_vec_file.scan_pubkeys(|_| count += 1);
assert_eq!(count, accounts_count);
});
});
group.bench_function(BenchmarkId::new("hot_storage", accounts_count), |b| {
b.iter(|| {
let mut count = 0;
hot_storage_reader.scan_pubkeys(|_| count += 1).unwrap();
assert_eq!(count, accounts_count);
});
});
}
}

criterion_group!(benches, bench_write_accounts_file, bench_scan_pubkeys);
criterion_main!(benches);
3 changes: 2 additions & 1 deletion accounts-db/src/append_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ const fn page_align(size: u64) -> u64 {
/// be able to hold about half of the accounts, so there would not be many syscalls needed to scan
/// the file. Since we also expect some larger accounts, this will also avoid reading/copying
/// large account data. This should be a decent starting value, and can be modified over time.
#[cfg_attr(feature = "dev-context-only-utils", qualifier_attr::qualifiers(pub))]
const SCAN_BUFFER_SIZE_WITHOUT_DATA: usize = 1 << 18;

lazy_static! {
Expand Down Expand Up @@ -1049,7 +1050,7 @@ impl AppendVec {
/// `data` is completely ignored, for example.
/// Also, no references have to be maintained/returned from an iterator function.
/// This fn can operate on a batch of data at once.
pub(crate) fn scan_pubkeys(&self, mut callback: impl FnMut(&Pubkey)) {
pub fn scan_pubkeys(&self, mut callback: impl FnMut(&Pubkey)) {
let mut offset = 0;
match &self.backing {
AppendVecFileBacking::Mmap(Mmap { mmap, .. }) => {
Expand Down

0 comments on commit 6d05c36

Please sign in to comment.