From 13c18ca93c99b7dba334b0171a2162bff5df0d73 Mon Sep 17 00:00:00 2001 From: brooks Date: Wed, 10 Jul 2024 10:49:53 -0400 Subject: [PATCH 1/3] Optimizes AppendVec::get_account_sizes() when using file io --- accounts-db/src/append_vec.rs | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 2b5241aaf5ad59..8570954cffd2cf 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -983,7 +983,7 @@ impl AppendVec { /// for each offset in `sorted_offsets`, get the size of the account. No other information is needed for the account. pub(crate) fn get_account_sizes(&self, sorted_offsets: &[usize]) -> Vec { - let mut result = Vec::with_capacity(sorted_offsets.len()); + let mut account_sizes = Vec::with_capacity(sorted_offsets.len()); match &self.backing { AppendVecFileBacking::Mmap(Mmap { mmap, .. }) => { let slice = self.get_valid_slice_from_mmap(mmap); @@ -996,18 +996,34 @@ impl AppendVec { // data doesn't fit, so don't include break; } - result.push(next.stored_size_aligned); + account_sizes.push(next.stored_size_aligned); } } - AppendVecFileBacking::File(_file) => { - for &offset in sorted_offsets { - self.get_stored_account_meta_callback(offset, |stored_meta| { - result.push(stored_meta.stored_size()); - }); + AppendVecFileBacking::File(file) => { + let buffer_size = std::cmp::min(SCAN_BUFFER_SIZE, self.len()); + let mut reader = + BufferedReader::new(buffer_size, self.len(), file, STORE_META_OVERHEAD); + + let mut prev_offset = 0; + for &curr_offset in sorted_offsets { + debug_assert!( + curr_offset >= prev_offset, + "sorted_offsets must be sorted: {sorted_offsets:?}", + ); + reader.advance_offset(curr_offset - prev_offset); + let read_result = reader.read(); + if read_result.ok() != Some(BufferedReaderStatus::Success) { + break; + } + let (_offset, bytes) = reader.get_offset_and_data(); + let (stored_meta, _next) = Self::get_type::(bytes, 0).unwrap(); + let stored_size = aligned_stored_size(stored_meta.data_len as usize); + account_sizes.push(stored_size); + prev_offset = curr_offset } } } - result + account_sizes } /// iterate over all pubkeys and call `callback`. From 180c69f57c08084c6ff754def7589d14c2e6ab7a Mon Sep 17 00:00:00 2001 From: brooks Date: Wed, 10 Jul 2024 11:38:48 -0400 Subject: [PATCH 2/3] reimpl --- accounts-db/src/append_vec.rs | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 8570954cffd2cf..a6804f215c399d 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -1000,26 +1000,21 @@ impl AppendVec { } } AppendVecFileBacking::File(file) => { - let buffer_size = std::cmp::min(SCAN_BUFFER_SIZE, self.len()); - let mut reader = - BufferedReader::new(buffer_size, self.len(), file, STORE_META_OVERHEAD); - - let mut prev_offset = 0; - for &curr_offset in sorted_offsets { - debug_assert!( - curr_offset >= prev_offset, - "sorted_offsets must be sorted: {sorted_offsets:?}", - ); - reader.advance_offset(curr_offset - prev_offset); - let read_result = reader.read(); - if read_result.ok() != Some(BufferedReaderStatus::Success) { + // self.len() is an atomic load, so only do it once + let valid_file_len = self.len(); + let mut buffer = [0u8; mem::size_of::()]; + for offset in sorted_offsets { + let Some(bytes_read) = + read_into_buffer(file, valid_file_len, *offset, &mut buffer).ok() + else { break; - } - let (_offset, bytes) = reader.get_offset_and_data(); - let (stored_meta, _next) = Self::get_type::(bytes, 0).unwrap(); + }; + let bytes = ValidSlice(&buffer[..bytes_read]); + let Some((stored_meta, _next)) = Self::get_type::(bytes, 0) else { + break; + }; let stored_size = aligned_stored_size(stored_meta.data_len as usize); account_sizes.push(stored_size); - prev_offset = curr_offset } } } From e999df3f1298b8e1fd910c651e23f763fab1d033 Mon Sep 17 00:00:00 2001 From: brooks Date: Thu, 11 Jul 2024 12:01:09 -0400 Subject: [PATCH 3/3] pr: add assert --- accounts-db/src/append_vec.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index a6804f215c399d..cc377f56114c35 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -1013,6 +1013,12 @@ impl AppendVec { let Some((stored_meta, _next)) = Self::get_type::(bytes, 0) else { break; }; + // Since we're only reading the StoredMeta and not the whole account, do a + // quick sanity check that there is likely a valid account at this offset. + debug_assert!( + *offset + STORE_META_OVERHEAD + stored_meta.data_len as usize + <= valid_file_len + ); let stored_size = aligned_stored_size(stored_meta.data_len as usize); account_sizes.push(stored_size); }