Skip to content

Commit

Permalink
feat(bloom-filter): add basic bloom filter creator (Part 1) (#5177)
Browse files Browse the repository at this point in the history
* feat(bloom-filter): add a simple bloom filter creator (Part 1)

Signed-off-by: Zhenchi <[email protected]>

* fix: clippy

Signed-off-by: Zhenchi <[email protected]>

* fix: header

Signed-off-by: Zhenchi <[email protected]>

* docs: add format comment

Signed-off-by: Zhenchi <[email protected]>

---------

Signed-off-by: Zhenchi <[email protected]>
  • Loading branch information
zhongzc authored and evenyag committed Dec 20, 2024
1 parent ffdcb8c commit bcecd8c
Show file tree
Hide file tree
Showing 6 changed files with 439 additions and 3 deletions.
26 changes: 23 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions src/index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ common-error.workspace = true
common-macro.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
fastbloom = "0.8"
fst.workspace = true
futures.workspace = true
greptime-proto.workspace = true
Expand All @@ -26,6 +27,7 @@ prost.workspace = true
regex.workspace = true
regex-automata.workspace = true
serde.workspace = true
serde_json.workspace = true
snafu.workspace = true
tantivy = { version = "0.22", features = ["zstd-compression"] }
tantivy-jieba = "0.11.0"
Expand Down
53 changes: 53 additions & 0 deletions src/index/src/bloom_filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use serde::{Deserialize, Serialize};

pub mod creator;
mod error;

pub type Bytes = Vec<u8>;
pub type BytesRef<'a> = &'a [u8];

/// The Meta information of the bloom filter stored in the file.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BloomFilterMeta {
/// The number of rows per segment.
pub rows_per_segment: usize,

/// The number of segments.
pub seg_count: usize,

/// The number of total rows.
pub row_count: usize,

/// The size of the bloom filter excluding the meta information.
pub bloom_filter_segments_size: usize,

/// Offset and size of bloom filters in the file.
pub bloom_filter_segments: Vec<BloomFilterSegmentLocation>,
}

/// The location of the bloom filter segment in the file.
#[derive(Debug, Serialize, Deserialize)]
pub struct BloomFilterSegmentLocation {
/// The offset of the bloom filter segment in the file.
pub offset: u64,

/// The size of the bloom filter segment in the file.
pub size: u64,

/// The number of elements in the bloom filter segment.
pub elem_count: usize,
}
Loading

0 comments on commit bcecd8c

Please sign in to comment.