diff --git a/Cargo.lock b/Cargo.lock index b042227a293b..0b3ece080a4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1183,6 +1183,7 @@ version = "0.5.0" dependencies = [ "api", "arc-swap", + "arrow", "arrow-schema", "async-stream", "async-trait", diff --git a/src/catalog/Cargo.toml b/src/catalog/Cargo.toml index 79f3603f4f19..9d9f1329d9b2 100644 --- a/src/catalog/Cargo.toml +++ b/src/catalog/Cargo.toml @@ -10,6 +10,7 @@ testing = [] [dependencies] api.workspace = true arc-swap = "1.0" +arrow.workspace = true arrow-schema.workspace = true async-stream.workspace = true async-trait = "0.1" diff --git a/src/catalog/src/information_schema/key_column_usage.rs b/src/catalog/src/information_schema/key_column_usage.rs index 7952a8e7f288..e77397fa089e 100644 --- a/src/catalog/src/information_schema/key_column_usage.rs +++ b/src/catalog/src/information_schema/key_column_usage.rs @@ -37,6 +37,13 @@ use crate::error::{ use crate::information_schema::{InformationTable, Predicates}; use crate::CatalogManager; +const CONSTRAINT_SCHEMA: &str = "constraint_schema"; +const CONSTRAINT_NAME: &str = "constraint_name"; +const TABLE_SCHEMA: &str = "table_schema"; +const TABLE_NAME: &str = "table_name"; +const COLUMN_NAME: &str = "column_name"; +const ORDINAL_POSITION: &str = "ordinal_position"; + /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`. pub(super) struct InformationSchemaKeyColumnUsage { schema: SchemaRef, @@ -61,24 +68,16 @@ impl InformationSchemaKeyColumnUsage { false, ), ColumnSchema::new( - "constraint_schema", - ConcreteDataType::string_datatype(), - false, - ), - ColumnSchema::new( - "constraint_name", + CONSTRAINT_SCHEMA, ConcreteDataType::string_datatype(), false, ), + ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false), ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("column_name", ConcreteDataType::string_datatype(), false), - ColumnSchema::new( - "ordinal_position", - ConcreteDataType::uint32_datatype(), - false, - ), + ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(ORDINAL_POSITION, ConcreteDataType::uint32_datatype(), false), ColumnSchema::new( "position_in_unique_constraint", ConcreteDataType::uint32_datatype(), @@ -280,12 +279,12 @@ impl InformationSchemaKeyColumnUsageBuilder { ordinal_position: u32, ) { let row = [ - ("constraint_schema", &Value::from(constraint_schema)), - ("constraint_name", &Value::from(constraint_name)), - ("table_schema", &Value::from(table_schema)), - ("table_name", &Value::from(table_name)), - ("column_name", &Value::from(column_name)), - ("ordinal_position", &Value::from(ordinal_position)), + (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)), + (CONSTRAINT_NAME, &Value::from(constraint_name)), + (TABLE_SCHEMA, &Value::from(table_schema)), + (TABLE_NAME, &Value::from(table_name)), + (COLUMN_NAME, &Value::from(column_name)), + (ORDINAL_POSITION, &Value::from(ordinal_position)), ]; if !predicates.eval(&row) { diff --git a/src/catalog/src/information_schema/predicate.rs b/src/catalog/src/information_schema/predicate.rs index 10e8aeecb3cd..ca5551672c7e 100644 --- a/src/catalog/src/information_schema/predicate.rs +++ b/src/catalog/src/information_schema/predicate.rs @@ -12,7 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use arrow::array::StringArray; +use arrow::compute::kernels::comparison; use common_query::logical_plan::DfExpr; +use datafusion::common::ScalarValue; +use datafusion::logical_expr::expr::Like; use datafusion::logical_expr::Operator; use datatypes::value::Value; use store_api::storage::ScanRequest; @@ -24,6 +28,7 @@ type ColumnName = String; #[derive(Clone, PartialEq, Eq, Debug)] enum Predicate { Eq(ColumnName, Value), + Like(ColumnName, String, bool), NotEq(ColumnName, Value), InList(ColumnName, Vec), And(Box, Box), @@ -46,6 +51,19 @@ impl Predicate { return Some(v == *value); } } + Predicate::Like(c, pattern, case_insenstive) => { + for (column, value) in row { + if c != column { + continue; + } + + let Value::String(bs) = value else { + continue; + }; + + return like_utf8(bs.as_utf8(), pattern, case_insenstive); + } + } Predicate::NotEq(c, v) => { for (column, value) in row { if c != column { @@ -63,17 +81,29 @@ impl Predicate { } } Predicate::And(left, right) => { - return match (left.eval(row), right.eval(row)) { + let left = left.eval(row); + + // short-circuit + if matches!(left, Some(false)) { + return Some(false); + } + + return match (left, right.eval(row)) { (Some(left), Some(right)) => Some(left && right), - (Some(false), None) => Some(false), (None, Some(false)) => Some(false), _ => None, }; } Predicate::Or(left, right) => { - return match (left.eval(row), right.eval(row)) { + let left = left.eval(row); + + // short-circuit + if matches!(left, Some(true)) { + return Some(true); + } + + return match (left, right.eval(row)) { (Some(left), Some(right)) => Some(left || right), - (Some(true), None) => Some(true), (None, Some(true)) => Some(true), _ => None, }; @@ -102,6 +132,30 @@ impl Predicate { Some(Predicate::Not(Box::new(p))) } + // expr LIKE pattern + DfExpr::Like(Like { + negated, + expr, + pattern, + case_insensitive, + .. + }) if is_column(&expr) && is_string_literal(&pattern) => { + // Safety: ensured by gurad + let DfExpr::Column(c) = *expr else { + unreachable!(); + }; + let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = *pattern else { + unreachable!(); + }; + + let p = Predicate::Like(c.name, pattern, case_insensitive); + + if negated { + Some(Predicate::Not(Box::new(p))) + } else { + Some(p) + } + } // left OP right DfExpr::BinaryExpr(bin) => match (*bin.left, bin.op, *bin.right) { // left == right @@ -183,6 +237,34 @@ impl Predicate { } } +/// Perform SQL left LIKE right, return `None` if fail to evaluate. +/// - `s` the target string +/// - `pattern` the pattern just like '%abc' +/// - `case_insenstive` whether to perform case-insensitive like or not. +fn like_utf8(s: &str, pattern: &str, case_insenstive: &bool) -> Option { + let array = StringArray::from(vec![s]); + let patterns = StringArray::new_scalar(pattern); + + let Ok(booleans) = (if *case_insenstive { + comparison::ilike(&array, &patterns) + } else { + comparison::like(&array, &patterns) + }) else { + return None; + }; + + // Safty: at least one value in result + Some(booleans.value(0)) +} + +fn is_string_literal(expr: &DfExpr) -> bool { + matches!(expr, DfExpr::Literal(ScalarValue::Utf8(Some(_)))) +} + +fn is_column(expr: &DfExpr) -> bool { + matches!(expr, DfExpr::Column(_)) +} + /// A list of predicate pub struct Predicates { predicates: Vec, @@ -324,6 +406,70 @@ mod tests { .is_none()); } + #[test] + fn test_predicate_like() { + // case insenstive + let expr = DfExpr::Like(Like { + negated: false, + expr: Box::new(column("a")), + pattern: Box::new(string_literal("%abc")), + case_insensitive: true, + escape_char: None, + }); + + let p = Predicate::from_expr(expr).unwrap(); + assert!( + matches!(&p, Predicate::Like(c, pattern, case_insensitive) if + c == "a" + && pattern == "%abc" + && *case_insensitive) + ); + + let match_row = [ + ("a", &Value::from("hello AbC")), + ("b", &Value::from("b value")), + ]; + let unmatch_row = [("a", &Value::from("bca")), ("b", &Value::from("b value"))]; + + assert!(p.eval(&match_row).unwrap()); + assert!(!p.eval(&unmatch_row).unwrap()); + assert!(p.eval(&[]).is_none()); + + // case senstive + let expr = DfExpr::Like(Like { + negated: false, + expr: Box::new(column("a")), + pattern: Box::new(string_literal("%abc")), + case_insensitive: false, + escape_char: None, + }); + + let p = Predicate::from_expr(expr).unwrap(); + assert!( + matches!(&p, Predicate::Like(c, pattern, case_insensitive) if + c == "a" + && pattern == "%abc" + && !*case_insensitive) + ); + assert!(!p.eval(&match_row).unwrap()); + assert!(!p.eval(&unmatch_row).unwrap()); + assert!(p.eval(&[]).is_none()); + + // not like + let expr = DfExpr::Like(Like { + negated: true, + expr: Box::new(column("a")), + pattern: Box::new(string_literal("%abc")), + case_insensitive: true, + escape_char: None, + }); + + let p = Predicate::from_expr(expr).unwrap(); + assert!(!p.eval(&match_row).unwrap()); + assert!(p.eval(&unmatch_row).unwrap()); + assert!(p.eval(&[]).is_none()); + } + fn column(name: &str) -> DfExpr { DfExpr::Column(Column { relation: None, @@ -435,11 +581,11 @@ mod tests { assert_eq!(2, predicates.predicates.len()); assert!( matches!(&predicates.predicates[0], Predicate::Eq(column, v) if column == "a" - && match_string_value(v, "a_value")) + && match_string_value(v, "a_value")) ); assert!( matches!(&predicates.predicates[1], Predicate::NotEq(column, v) if column == "b" - && match_string_value(v, "b_value")) + && match_string_value(v, "b_value")) ); } diff --git a/src/catalog/src/information_schema/schemata.rs b/src/catalog/src/information_schema/schemata.rs index cc1e6d79a429..eddfb142cc77 100644 --- a/src/catalog/src/information_schema/schemata.rs +++ b/src/catalog/src/information_schema/schemata.rs @@ -37,6 +37,11 @@ use crate::error::{ use crate::information_schema::{InformationTable, Predicates}; use crate::CatalogManager; +const CATALOG_NAME: &str = "catalog_name"; +const SCHEMA_NAME: &str = "schema_name"; +const DEFAULT_CHARACTER_SET_NAME: &str = "default_character_set_name"; +const DEFAULT_COLLATION_NAME: &str = "default_collation_name"; + /// The `information_schema.schemata` table implementation. pub(super) struct InformationSchemaSchemata { schema: SchemaRef, @@ -55,15 +60,15 @@ impl InformationSchemaSchemata { pub(crate) fn schema() -> SchemaRef { Arc::new(Schema::new(vec![ - ColumnSchema::new("catalog_name", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("schema_name", ConcreteDataType::string_datatype(), false), + ColumnSchema::new(CATALOG_NAME, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(SCHEMA_NAME, ConcreteDataType::string_datatype(), false), ColumnSchema::new( - "default_character_set_name", + DEFAULT_CHARACTER_SET_NAME, ConcreteDataType::string_datatype(), false, ), ColumnSchema::new( - "default_collation_name", + DEFAULT_COLLATION_NAME, ConcreteDataType::string_datatype(), false, ), @@ -172,10 +177,10 @@ impl InformationSchemaSchemataBuilder { fn add_schema(&mut self, predicates: &Predicates, catalog_name: &str, schema_name: &str) { let row = [ - ("catalog_name", &Value::from(catalog_name)), - ("schema_name", &Value::from(schema_name)), - ("default_character_set_name", &Value::from("utf8")), - ("default_collation_name", &Value::from("utf8_bin")), + (CATALOG_NAME, &Value::from(catalog_name)), + (SCHEMA_NAME, &Value::from(schema_name)), + (DEFAULT_CHARACTER_SET_NAME, &Value::from("utf8")), + (DEFAULT_COLLATION_NAME, &Value::from("utf8_bin")), ]; if !predicates.eval(&row) { diff --git a/src/catalog/src/information_schema/tables.rs b/src/catalog/src/information_schema/tables.rs index dd0a89833cd4..5320e50277f0 100644 --- a/src/catalog/src/information_schema/tables.rs +++ b/src/catalog/src/information_schema/tables.rs @@ -38,6 +38,13 @@ use crate::error::{ use crate::information_schema::{InformationTable, Predicates}; use crate::CatalogManager; +const TABLE_CATALOG: &str = "table_catalog"; +const TABLE_SCHEMA: &str = "table_schema"; +const TABLE_NAME: &str = "table_name"; +const TABLE_TYPE: &str = "table_type"; +const TABLE_ID: &str = "table_id"; +const ENGINE: &str = "engine"; + pub(super) struct InformationSchemaTables { schema: SchemaRef, catalog_name: String, @@ -55,12 +62,12 @@ impl InformationSchemaTables { pub(crate) fn schema() -> SchemaRef { Arc::new(Schema::new(vec![ - ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false), - ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true), - ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true), + ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(TABLE_TYPE, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true), ])) } @@ -204,10 +211,10 @@ impl InformationSchemaTablesBuilder { }; let row = [ - ("table_catalog", &Value::from(catalog_name)), - ("table_schema", &Value::from(schema_name)), - ("table_name", &Value::from(table_name)), - ("table_type", &Value::from(table_type)), + (TABLE_CATALOG, &Value::from(catalog_name)), + (TABLE_SCHEMA, &Value::from(schema_name)), + (TABLE_NAME, &Value::from(table_name)), + (TABLE_TYPE, &Value::from(table_type)), ]; if !predicates.eval(&row) { diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result index 890eed008cc6..75a692b51f1e 100644 --- a/tests/cases/standalone/common/system/information_schema.result +++ b/tests/cases/standalone/common/system/information_schema.result @@ -336,6 +336,17 @@ order by table_name; | numbers | +------------+ +select table_name +from information_schema.tables +where table_schema like 'my%' +order by table_name; + ++------------+ +| table_name | ++------------+ +| foo | ++------------+ + select table_name from information_schema.tables where table_schema not in ('my_db', 'information_schema') @@ -418,6 +429,22 @@ select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME != 'TIME INDEX'; | def | public | PRIMARY | def | public | numbers | number | 1 | | | | | +--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+ +select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME LIKE '%INDEX'; + ++--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+ +| constraint_catalog | constraint_schema | constraint_name | table_catalog | table_schema | table_name | column_name | ordinal_position | position_in_unique_constraint | referenced_table_schema | referenced_table_name | referenced_column_name | ++--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+ +| def | my_db | TIME INDEX | def | my_db | foo | ts | 1 | | | | | ++--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+ + +select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME NOT LIKE '%INDEX'; + ++--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+ +| constraint_catalog | constraint_schema | constraint_name | table_catalog | table_schema | table_name | column_name | ordinal_position | position_in_unique_constraint | referenced_table_schema | referenced_table_name | referenced_column_name | ++--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+ +| def | public | PRIMARY | def | public | numbers | number | 1 | | | | | ++--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+ + select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME == 'TIME INDEX' AND CONSTRAINT_SCHEMA != 'my_db'; ++ diff --git a/tests/cases/standalone/common/system/information_schema.sql b/tests/cases/standalone/common/system/information_schema.sql index c741a33d5252..0ba3508aca47 100644 --- a/tests/cases/standalone/common/system/information_schema.sql +++ b/tests/cases/standalone/common/system/information_schema.sql @@ -29,6 +29,11 @@ from information_schema.tables where table_schema in ('my_db', 'public') order by table_name; +select table_name +from information_schema.tables +where table_schema like 'my%' +order by table_name; + select table_name from information_schema.tables where table_schema not in ('my_db', 'information_schema') @@ -68,6 +73,10 @@ select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME = 'TIME INDEX'; select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME != 'TIME INDEX'; +select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME LIKE '%INDEX'; + +select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME NOT LIKE '%INDEX'; + select * from KEY_COLUMN_USAGE where CONSTRAINT_NAME == 'TIME INDEX' AND CONSTRAINT_SCHEMA != 'my_db'; -- schemata --