Skip to content

Commit

Permalink
feat: introduce SKIPPING index (part 1) (#5155)
Browse files Browse the repository at this point in the history
* skip index parser

Signed-off-by: Ruihang Xia <[email protected]>

* wip: sqlness

Signed-off-by: Ruihang Xia <[email protected]>

* impl show create part

Signed-off-by: Ruihang Xia <[email protected]>

* add empty line

Signed-off-by: Ruihang Xia <[email protected]>

* change keyword to SKIPPING INDEX

Signed-off-by: Ruihang Xia <[email protected]>

* rename local variables

Signed-off-by: Ruihang Xia <[email protected]>

---------

Signed-off-by: Ruihang Xia <[email protected]>
  • Loading branch information
waynexia authored Dec 16, 2024
1 parent 5ffda7e commit 5469832
Show file tree
Hide file tree
Showing 14 changed files with 371 additions and 21 deletions.
12 changes: 11 additions & 1 deletion src/api/src/v1/column_def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::collections::HashMap;

use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY,
FULLTEXT_KEY, INVERTED_INDEX_KEY,
FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
};
use greptime_proto::v1::Analyzer;
use snafu::ResultExt;
Expand All @@ -29,6 +29,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType};
const FULLTEXT_GRPC_KEY: &str = "fulltext";
/// Key used to store inverted index options in gRPC column options.
const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
/// Key used to store skip index options in gRPC column options.
const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";

/// Tries to construct a `ColumnSchema` from the given `ColumnDef`.
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
Expand Down Expand Up @@ -60,6 +62,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) {
metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone());
}
if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.clone());
}
}

ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
Expand All @@ -84,6 +89,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
.options
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), inverted_index.clone());
}
if let Some(skipping_index) = column_schema.metadata().get(SKIPPING_INDEX_KEY) {
options
.options
.insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
}

(!options.options.is_empty()).then_some(options)
}
Expand Down
9 changes: 8 additions & 1 deletion src/datatypes/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,12 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid skipping index option: {}", msg))]
InvalidSkippingIndexOption {
msg: String,
#[snafu(implicit)]
location: Location,
},
}

impl ErrorExt for Error {
Expand All @@ -252,7 +258,8 @@ impl ErrorExt for Error {
| InvalidPrecisionOrScale { .. }
| InvalidJson { .. }
| InvalidVector { .. }
| InvalidFulltextOption { .. } => StatusCode::InvalidArguments,
| InvalidFulltextOption { .. }
| InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments,

ValueExceedsPrecision { .. }
| CastType { .. }
Expand Down
7 changes: 4 additions & 3 deletions src/datatypes/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ use snafu::{ensure, ResultExt};
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
use crate::prelude::ConcreteDataType;
pub use crate::schema::column_schema::{
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata,
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions,
COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
TIME_INDEX_KEY,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
};
pub use crate::schema::constraint::ColumnDefaultConstraint;
pub use crate::schema::raw::RawSchema;
Expand Down
106 changes: 106 additions & 0 deletions src/datatypes/src/schema/column_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,20 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
pub const FULLTEXT_KEY: &str = "greptime:fulltext";
/// Key used to store whether the column has inverted index in arrow field's metadata.
pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
/// Key used to store skip options in arrow field's metadata.
pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";

/// Keys used in fulltext options
pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";

/// Keys used in SKIPPING index options
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";

pub const DEFAULT_GRANULARITY: u32 = 10240;

/// Schema of a column, used as an immutable struct.
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ColumnSchema {
Expand Down Expand Up @@ -298,6 +306,34 @@ impl ColumnSchema {
);
Ok(())
}

/// Retrieves the skipping index options for the column.
pub fn skipping_index_options(&self) -> Result<Option<SkippingIndexOptions>> {
match self.metadata.get(SKIPPING_INDEX_KEY) {
None => Ok(None),
Some(json) => {
let options =
serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
Ok(Some(options))
}
}
}

pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result<Self> {
self.metadata.insert(
SKIPPING_INDEX_KEY.to_string(),
serde_json::to_string(&options).context(error::SerializeSnafu)?,
);
Ok(self)
}

pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> {
self.metadata.insert(
SKIPPING_INDEX_KEY.to_string(),
serde_json::to_string(options).context(error::SerializeSnafu)?,
);
Ok(())
}
}

/// Column extended type set in column schema's metadata.
Expand Down Expand Up @@ -495,6 +531,76 @@ impl fmt::Display for FulltextAnalyzer {
}
}

/// Skipping options for a column.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
#[serde(rename_all = "kebab-case")]
pub struct SkippingIndexOptions {
/// The granularity of the skip index.
pub granularity: u32,
/// The type of the skip index.
#[serde(default)]
pub index_type: SkipIndexType,
}

impl fmt::Display for SkippingIndexOptions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "granularity={}", self.granularity)?;
write!(f, ", index_type={}", self.index_type)?;
Ok(())
}
}

/// Skip index types.
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
pub enum SkipIndexType {
#[default]
BloomFilter,
}

impl fmt::Display for SkipIndexType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SkipIndexType::BloomFilter => write!(f, "BLOOM"),
}
}
}

impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
type Error = Error;

fn try_from(options: HashMap<String, String>) -> Result<Self> {
// Parse granularity with default value 1
let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
Some(value) => value.parse::<u32>().map_err(|_| {
error::InvalidSkippingIndexOptionSnafu {
msg: format!("Invalid granularity: {value}, expected: positive integer"),
}
.build()
})?,
None => DEFAULT_GRANULARITY,
};

// Parse index type with default value BloomFilter
let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
Some(typ) => match typ.to_ascii_uppercase().as_str() {
"BLOOM" => SkipIndexType::BloomFilter,
_ => {
return error::InvalidSkippingIndexOptionSnafu {
msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
}
.fail();
}
},
None => SkipIndexType::default(),
};

Ok(SkippingIndexOptions {
granularity,
index_type,
})
}
}

#[cfg(test)]
mod tests {
use std::sync::Arc;
Expand Down
3 changes: 2 additions & 1 deletion src/operator/src/statement/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ impl StatementExecutor {

table_info.ident.table_id = table_id;

let table_info = Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?);
let table_info: Arc<TableInfo> =
Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?);
create_table.table_id = Some(api::v1::TableId { id: table_id });

let table = DistTable::table(table_info);
Expand Down
11 changes: 10 additions & 1 deletion src/query/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,13 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},

#[snafu(display("Failed to get SKIPPING index options"))]
GetSkippingIndexOptions {
source: datatypes::error::Error,
#[snafu(implicit)]
location: Location,
},
}

impl ErrorExt for Error {
Expand Down Expand Up @@ -366,7 +373,9 @@ impl ErrorExt for Error {
MissingTableMutationHandler { .. } => StatusCode::Unexpected,
GetRegionMetadata { .. } => StatusCode::RegionNotReady,
TableReadOnly { .. } => StatusCode::Unsupported,
GetFulltextOptions { source, .. } => source.status_code(),
GetFulltextOptions { source, .. } | GetSkippingIndexOptions { source, .. } => {
source.status_code()
}
}
}

Expand Down
34 changes: 29 additions & 5 deletions src/query/src/sql/show_create_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use std::collections::HashMap;
use common_meta::SchemaOptions;
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
};
use snafu::ResultExt;
use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName};
Expand All @@ -32,7 +33,8 @@ use table::metadata::{TableInfoRef, TableMeta};
use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY};

use crate::error::{
ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, Result, SqlSnafu,
ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu,
GetSkippingIndexOptionsSnafu, Result, SqlSnafu,
};

/// Generates CREATE TABLE options from given table metadata and schema-level options.
Expand Down Expand Up @@ -115,6 +117,23 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
extensions.fulltext_options = Some(map.into());
}

if let Some(opt) = column_schema
.skipping_index_options()
.context(GetSkippingIndexOptionsSnafu)?
{
let map = HashMap::from([
(
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY.to_string(),
opt.granularity.to_string(),
),
(
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE.to_string(),
opt.index_type.to_string(),
),
]);
extensions.skipping_index_options = Some(map.into());
}

Ok(Column {
column_def: ColumnDef {
name: Ident::with_quote(quote_style, name),
Expand Down Expand Up @@ -219,7 +238,7 @@ mod tests {

use common_time::timestamp::TimeUnit;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{FulltextOptions, Schema, SchemaRef};
use datatypes::schema::{FulltextOptions, Schema, SchemaRef, SkippingIndexOptions};
use table::metadata::*;
use table::requests::{
TableOptions, FILE_TABLE_FORMAT_KEY, FILE_TABLE_LOCATION_KEY, FILE_TABLE_META_KEY,
Expand All @@ -230,7 +249,12 @@ mod tests {
#[test]
fn test_show_create_table_sql() {
let schema = vec![
ColumnSchema::new("id", ConcreteDataType::uint32_datatype(), true),
ColumnSchema::new("id", ConcreteDataType::uint32_datatype(), true)
.with_skipping_options(SkippingIndexOptions {
granularity: 4096,
..Default::default()
})
.unwrap(),
ColumnSchema::new("host", ConcreteDataType::string_datatype(), true)
.set_inverted_index(true),
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
Expand Down Expand Up @@ -300,7 +324,7 @@ mod tests {
assert_eq!(
r#"
CREATE TABLE IF NOT EXISTS "system_metrics" (
"id" INT UNSIGNED NULL,
"id" INT UNSIGNED NULL SKIPPING INDEX WITH(granularity = '4096', type = 'BLOOM'),
"host" STRING NULL,
"cpu" DOUBLE NULL,
"disk" FLOAT NULL,
Expand Down
9 changes: 8 additions & 1 deletion src/sql/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,13 @@ pub enum Error {
location: Location,
},

#[snafu(display("Failed to set SKIPPING index option"))]
SetSkippingIndexOption {
source: datatypes::error::Error,
#[snafu(implicit)]
location: Location,
},

#[snafu(display("Datatype error: {}", source))]
Datatype {
source: datatypes::error::Error,
Expand Down Expand Up @@ -375,7 +382,7 @@ impl ErrorExt for Error {
ConvertSqlValue { .. } | ConvertValue { .. } => StatusCode::Unsupported,

PermissionDenied { .. } => StatusCode::PermissionDenied,
SetFulltextOption { .. } => StatusCode::Unexpected,
SetFulltextOption { .. } | SetSkippingIndexOption { .. } => StatusCode::Unexpected,
}
}

Expand Down
Loading

0 comments on commit 5469832

Please sign in to comment.