Skip to content

Commit

Permalink
refactor: cache inverted index with fixed-size page (#5114)
Browse files Browse the repository at this point in the history
* feat: cache inverted index by page instead of file

* fix: add unit test and fix bugs

* chore: typo

* chore: ci

* fix: math

* chore: apply review comments

* chore: renames

* test: add unit test for index key calculation

* refactor: use ReadableSize

* feat: add config for inverted index page size

* chore: update config file

* refactor: handle multiple range read and fix some related bugs

* fix: add config

* test: turn to a fs reader to match behaviors of object store
  • Loading branch information
CookiePieWw authored and evenyag committed Dec 20, 2024
1 parent 8b1484c commit fdccf4f
Show file tree
Hide file tree
Showing 18 changed files with 434 additions and 69 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions config/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
Expand Down Expand Up @@ -475,6 +476,9 @@
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically (default)<br/>- `disable`: never |
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `auto` | Memory threshold for performing an external sort during index creation.<br/>- `auto`: automatically determine the threshold based on the system memory size (default)<br/>- `unlimited`: no memory limit<br/>- `[size]` e.g. `64MB`: fixed memory threshold |
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
Expand Down
9 changes: 9 additions & 0 deletions config/datanode.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,15 @@ mem_threshold_on_create = "auto"
## Deprecated, use `region_engine.mito.index.aux_path` instead.
intermediate_path = ""

## Cache size for inverted index metadata.
metadata_cache_size = "64MiB"

## Cache size for inverted index content.
content_cache_size = "128MiB"

## Page size for inverted index content cache.
content_cache_page_size = "8MiB"

## The options for full-text index in Mito engine.
[region_engine.mito.fulltext_index]

Expand Down
3 changes: 3 additions & 0 deletions config/standalone.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,9 @@ metadata_cache_size = "64MiB"
## Cache size for inverted index content.
content_cache_size = "128MiB"

## Page size for inverted index content cache.
content_cache_page_size = "8MiB"

## The options for full-text index in Mito engine.
[region_engine.mito.fulltext_index]

Expand Down
4 changes: 1 addition & 3 deletions src/common/base/src/range_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,7 @@ impl RangeReader for Vec<u8> {
})
}

async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
range.end = range.end.min(self.len() as u64);

async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]);
Ok(bytes)
}
Expand Down
13 changes: 7 additions & 6 deletions src/index/src/inverted_index/format/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::ops::Range;
use std::sync::Arc;

use async_trait::async_trait;
Expand All @@ -30,23 +31,23 @@ mod footer;
#[mockall::automock]
#[async_trait]
pub trait InvertedIndexReader: Send {
/// Reads all data to dest.
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize>;

/// Seeks to given offset and reads data with exact size as provided.
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;

/// Reads the bytes in the given ranges.
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>>;

/// Retrieves metadata of all inverted indices stored within the blob.
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;

/// Retrieves the finite state transducer (FST) map from the given offset and size.
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
let fst_data = self.seek_read(offset, size).await?;
let fst_data = self.range_read(offset, size).await?;
FstMap::new(fst_data).context(DecodeFstSnafu)
}

/// Retrieves the bitmap from the given offset and size.
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
self.seek_read(offset, size).await.map(BitVec::from_vec)
self.range_read(offset, size).await.map(BitVec::from_vec)
}
}
17 changes: 7 additions & 10 deletions src/index/src/inverted_index/format/reader/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::ops::Range;
use std::sync::Arc;

use async_trait::async_trait;
Expand Down Expand Up @@ -50,16 +51,7 @@ impl<R> InvertedIndexBlobReader<R> {

#[async_trait]
impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
self.source
.read_into(0..metadata.content_length, dest)
.await
.context(CommonIoSnafu)?;
Ok(metadata.content_length as usize)
}

async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
let buf = self
.source
.read(offset..offset + size as u64)
Expand All @@ -68,6 +60,11 @@ impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
Ok(buf.into())
}

async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>> {
let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?;
Ok(bufs.into_iter().map(|buf| buf.into()).collect())
}

async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
let blob_size = metadata.content_length;
Expand Down
1 change: 1 addition & 0 deletions src/mito2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ aquamarine.workspace = true
async-channel = "1.9"
async-stream.workspace = true
async-trait = "0.1"
bytemuck.workspace = true
bytes.workspace = true
common-base.workspace = true
common-config.workspace = true
Expand Down
14 changes: 12 additions & 2 deletions src/mito2/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ pub struct CacheManagerBuilder {
page_cache_size: u64,
index_metadata_size: u64,
index_content_size: u64,
index_content_page_size: u64,
puffin_metadata_size: u64,
write_cache: Option<WriteCacheRef>,
selector_result_cache_size: u64,
Expand Down Expand Up @@ -286,6 +287,12 @@ impl CacheManagerBuilder {
self
}

/// Sets page size for index content.
pub fn index_content_page_size(mut self, bytes: u64) -> Self {
self.index_content_page_size = bytes;
self
}

/// Sets cache size for puffin metadata.
pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
self.puffin_metadata_size = bytes;
Expand Down Expand Up @@ -352,8 +359,11 @@ impl CacheManagerBuilder {
})
.build()
});
let inverted_index_cache =
InvertedIndexCache::new(self.index_metadata_size, self.index_content_size);
let inverted_index_cache = InvertedIndexCache::new(
self.index_metadata_size,
self.index_content_size,
self.index_content_page_size,
);
let puffin_metadata_cache =
PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
Expand Down
Loading

0 comments on commit fdccf4f

Please sign in to comment.