diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index f026d3f6f97f..77dcd2c62190 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY, - FULLTEXT_KEY, INVERTED_INDEX_KEY, + FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, }; use greptime_proto::v1::Analyzer; use snafu::ResultExt; @@ -29,6 +29,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType}; const FULLTEXT_GRPC_KEY: &str = "fulltext"; /// Key used to store inverted index options in gRPC column options. const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index"; +/// Key used to store skip index options in gRPC column options. +const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index"; /// Tries to construct a `ColumnSchema` from the given `ColumnDef`. pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { @@ -60,6 +62,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) { metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone()); } + if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) { + metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.clone()); + } } ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) @@ -84,6 +89,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option StatusCode::InvalidArguments, + | InvalidFulltextOption { .. } + | InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments, ValueExceedsPrecision { .. } | CastType { .. } diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 2eaa0254fbee..c537a4608b42 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -28,10 +28,11 @@ use snafu::{ensure, ResultExt}; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; use crate::prelude::ConcreteDataType; pub use crate::schema::column_schema::{ - ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, + ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, - TIME_INDEX_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, + SKIPPING_INDEX_KEY, TIME_INDEX_KEY, }; pub use crate::schema::constraint::ColumnDefaultConstraint; pub use crate::schema::raw::RawSchema; diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index c1e2df846918..aee9efd9625d 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -39,12 +39,20 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint"; pub const FULLTEXT_KEY: &str = "greptime:fulltext"; /// Key used to store whether the column has inverted index in arrow field's metadata. pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index"; +/// Key used to store skip options in arrow field's metadata. +pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index"; /// Keys used in fulltext options pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable"; pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; +/// Keys used in SKIPPING index options +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity"; +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type"; + +pub const DEFAULT_GRANULARITY: u32 = 10240; + /// Schema of a column, used as an immutable struct. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ColumnSchema { @@ -298,6 +306,34 @@ impl ColumnSchema { ); Ok(()) } + + /// Retrieves the skipping index options for the column. + pub fn skipping_index_options(&self) -> Result> { + match self.metadata.get(SKIPPING_INDEX_KEY) { + None => Ok(None), + Some(json) => { + let options = + serde_json::from_str(json).context(error::DeserializeSnafu { json })?; + Ok(Some(options)) + } + } + } + + pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(&options).context(error::SerializeSnafu)?, + ); + Ok(self) + } + + pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(options).context(error::SerializeSnafu)?, + ); + Ok(()) + } } /// Column extended type set in column schema's metadata. @@ -495,6 +531,76 @@ impl fmt::Display for FulltextAnalyzer { } } +/// Skipping options for a column. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] +#[serde(rename_all = "kebab-case")] +pub struct SkippingIndexOptions { + /// The granularity of the skip index. + pub granularity: u32, + /// The type of the skip index. + #[serde(default)] + pub index_type: SkipIndexType, +} + +impl fmt::Display for SkippingIndexOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "granularity={}", self.granularity)?; + write!(f, ", index_type={}", self.index_type)?; + Ok(()) + } +} + +/// Skip index types. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)] +pub enum SkipIndexType { + #[default] + BloomFilter, +} + +impl fmt::Display for SkipIndexType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SkipIndexType::BloomFilter => write!(f, "BLOOM"), + } + } +} + +impl TryFrom> for SkippingIndexOptions { + type Error = Error; + + fn try_from(options: HashMap) -> Result { + // Parse granularity with default value 1 + let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) { + Some(value) => value.parse::().map_err(|_| { + error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid granularity: {value}, expected: positive integer"), + } + .build() + })?, + None => DEFAULT_GRANULARITY, + }; + + // Parse index type with default value BloomFilter + let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) { + Some(typ) => match typ.to_ascii_uppercase().as_str() { + "BLOOM" => SkipIndexType::BloomFilter, + _ => { + return error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"), + } + .fail(); + } + }, + None => SkipIndexType::default(), + }; + + Ok(SkippingIndexOptions { + granularity, + index_type, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index ed96ca6f1833..eba88ee44d8a 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -271,7 +271,8 @@ impl StatementExecutor { table_info.ident.table_id = table_id; - let table_info = Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); + let table_info: Arc = + Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); create_table.table_id = Some(api::v1::TableId { id: table_id }); let table = DistTable::table(table_info); diff --git a/src/query/src/error.rs b/src/query/src/error.rs index 7e246d11c332..e696008cf546 100644 --- a/src/query/src/error.rs +++ b/src/query/src/error.rs @@ -316,6 +316,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to get SKIPPING index options"))] + GetSkippingIndexOptions { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -366,7 +373,9 @@ impl ErrorExt for Error { MissingTableMutationHandler { .. } => StatusCode::Unexpected, GetRegionMetadata { .. } => StatusCode::RegionNotReady, TableReadOnly { .. } => StatusCode::Unsupported, - GetFulltextOptions { source, .. } => source.status_code(), + GetFulltextOptions { source, .. } | GetSkippingIndexOptions { source, .. } => { + source.status_code() + } } } diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index ca69dfc5e69e..b903509d2270 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -19,7 +19,8 @@ use std::collections::HashMap; use common_meta::SchemaOptions; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, }; use snafu::ResultExt; use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; @@ -32,7 +33,8 @@ use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY}; use crate::error::{ - ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, Result, SqlSnafu, + ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, + GetSkippingIndexOptionsSnafu, Result, SqlSnafu, }; /// Generates CREATE TABLE options from given table metadata and schema-level options. @@ -115,6 +117,23 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result StatusCode::Unsupported, PermissionDenied { .. } => StatusCode::PermissionDenied, - SetFulltextOption { .. } => StatusCode::Unexpected, + SetFulltextOption { .. } | SetSkippingIndexOption { .. } => StatusCode::Unexpected, } } diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index bb9aadadb703..f40ecb7b6efd 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -36,7 +36,9 @@ use crate::error::{ SyntaxSnafu, UnexpectedSnafu, UnsupportedSnafu, }; use crate::parser::{ParserContext, FLOW}; -use crate::parsers::utils::validate_column_fulltext_create_option; +use crate::parsers::utils::{ + validate_column_fulltext_create_option, validate_column_skipping_index_create_option, +}; use crate::statements::create::{ Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, Partitions, TableConstraint, VECTOR_OPT_DIM, @@ -53,6 +55,7 @@ pub const SINK: &str = "SINK"; pub const EXPIRE: &str = "EXPIRE"; pub const AFTER: &str = "AFTER"; pub const INVERTED: &str = "INVERTED"; +pub const SKIPPING: &str = "SKIPPING"; const DB_OPT_KEY_TTL: &str = "ttl"; @@ -701,6 +704,49 @@ impl<'a> ParserContext<'a> { column_extensions.vector_options = Some(options.into()); } + let mut is_index_declared = false; + + if let Token::Word(word) = parser.peek_token().token + && word.value.eq_ignore_ascii_case(SKIPPING) + { + parser.next_token(); + // Consume `INDEX` keyword + ensure!( + parser.parse_keyword(Keyword::INDEX), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "expect INDEX after SKIPPING keyword", + } + ); + ensure!( + column_extensions.skipping_index_options.is_none(), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "duplicated SKIPPING index option", + } + ); + + let options = parser + .parse_options(Keyword::WITH) + .context(error::SyntaxSnafu)? + .into_iter() + .map(parse_option_string) + .collect::>>()?; + + for key in options.keys() { + ensure!( + validate_column_skipping_index_create_option(key), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: format!("invalid SKIP option: {key}"), + } + ); + } + + column_extensions.skipping_index_options = Some(options.into()); + is_index_declared |= true; + } + if parser.parse_keyword(Keyword::FULLTEXT) { ensure!( column_extensions.fulltext_options.is_none(), @@ -738,10 +784,10 @@ impl<'a> ParserContext<'a> { } column_extensions.fulltext_options = Some(options.into()); - Ok(true) - } else { - Ok(false) + is_index_declared |= true; } + + Ok(is_index_declared) } fn parse_optional_table_constraint(&mut self) -> Result> { @@ -2103,6 +2149,57 @@ CREATE TABLE log ( .contains("invalid FULLTEXT option")); } + #[test] + fn test_parse_create_table_skip_options() { + let sql = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX WITH (granularity='8192', type='bloom'), +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(!col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + + let sql = r" + CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX, + )"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + } + #[test] fn test_parse_create_view_with_columns() { let sql = "CREATE VIEW test () AS SELECT * FROM NUMBERS"; diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index ae5146d7ee7b..f7eefc4b9562 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -26,7 +26,10 @@ use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::TableReference; use datatypes::arrow::datatypes::DataType; -use datatypes::schema::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; +use datatypes::schema::{ + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, +}; use snafu::ResultExt; use crate::error::{ @@ -119,3 +122,11 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool { ] .contains(&key) } + +pub fn validate_column_skipping_index_create_option(key: &str) -> bool { + [ + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + ] + .contains(&key) +} diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 25cc3bf7e5be..00196ed5313b 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -58,7 +58,8 @@ use crate::error::{ self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, DatatypeSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu, ParseSqlValueSnafu, Result, SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, - TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, UnsupportedUnaryOpSnafu, + SetSkippingIndexOptionSnafu, TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, + UnsupportedUnaryOpSnafu, }; use crate::statements::create::Column; pub use crate::statements::option_map::OptionMap; @@ -513,6 +514,12 @@ pub fn column_to_schema( .context(SetFulltextOptionSnafu)?; } + if let Some(options) = column.extensions.build_skipping_index_options()? { + column_schema = column_schema + .with_skipping_options(options) + .context(SetSkippingIndexOptionSnafu)?; + } + Ok(column_schema) } @@ -1519,6 +1526,7 @@ mod tests { .into(), ), vector_options: None, + skipping_index_options: None, }, }; diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index e4ea46572e5f..3ea265fb7f40 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; -use datatypes::schema::FulltextOptions; +use datatypes::schema::{FulltextOptions, SkippingIndexOptions}; use itertools::Itertools; use serde::Serialize; use snafu::ResultExt; @@ -24,7 +24,7 @@ use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; -use crate::error::{Result, SetFulltextOptionSnafu}; +use crate::error::{Result, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu}; use crate::statements::statement::Statement; use crate::statements::OptionMap; @@ -116,6 +116,8 @@ pub struct ColumnExtensions { pub fulltext_options: Option, /// Vector options. pub vector_options: Option, + /// Skipping index options. + pub skipping_index_options: Option, } impl Column { @@ -158,6 +160,15 @@ impl Display for Column { write!(f, " FULLTEXT")?; } } + + if let Some(skipping_index_options) = &self.extensions.skipping_index_options { + if !skipping_index_options.is_empty() { + let options = skipping_index_options.kv_pairs(); + write!(f, " SKIPPING INDEX WITH({})", format_list_comma!(options))?; + } else { + write!(f, " SKIPPING INDEX")?; + } + } Ok(()) } } @@ -171,6 +182,17 @@ impl ColumnExtensions { let options: HashMap = options.clone().into_map(); Ok(Some(options.try_into().context(SetFulltextOptionSnafu)?)) } + + pub fn build_skipping_index_options(&self) -> Result> { + let Some(options) = self.skipping_index_options.as_ref() else { + return Ok(None); + }; + + let options: HashMap = options.clone().into_map(); + Ok(Some( + options.try_into().context(SetSkippingIndexOptionSnafu)?, + )) + } } #[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] diff --git a/tests/cases/standalone/common/create/create_with_skip_index.result b/tests/cases/standalone/common/create/create_with_skip_index.result new file mode 100644 index 000000000000..00dd24dc6c9a --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.result @@ -0,0 +1,33 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +Affected Rows: 0 + +show +create table + skipping_table; + ++----------------+---------------------------------------------------------------------------------+ +| Table | Create Table | ++----------------+---------------------------------------------------------------------------------+ +| skipping_table | CREATE TABLE IF NOT EXISTS "skipping_table" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "id" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'), | +| | "name" STRING NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+---------------------------------------------------------------------------------+ + +drop table skipping_table; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/create/create_with_skip_index.sql b/tests/cases/standalone/common/create/create_with_skip_index.sql new file mode 100644 index 000000000000..0558936699a4 --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.sql @@ -0,0 +1,14 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +show +create table + skipping_table; + +drop table skipping_table;