Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix match query for integer fields #1037

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions quesma/model/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ const (
TimestampFieldName = "@timestamp"

DateHourFunction = "__quesma_date_hour"
MatchOperator = "__quesma_match"
)
2 changes: 1 addition & 1 deletion quesma/model/expr_string_renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func (v *renderer) VisitInfix(e InfixExpr) interface{} {
}
// This might look like a strange heuristics to but is aligned with the way we are currently generating the statement
// I think in the future every infix op should be in braces.
if e.Op == "AND" || e.Op == "OR" {
if strings.HasPrefix(e.Op, "_") || e.Op == "AND" || e.Op == "OR" {
return fmt.Sprintf("(%v %v %v)", lhs, e.Op, rhs)
} else if strings.Contains(e.Op, "LIKE") || e.Op == "IS" || e.Op == "IN" || e.Op == "REGEXP" {
return fmt.Sprintf("%v %v %v", lhs, e.Op, rhs)
Expand Down
4 changes: 2 additions & 2 deletions quesma/model/highlighter.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ func (h *Highlighter) SetTokensToHighlight(selectCmd SelectCommand) {

visitor.OverrideVisitInfix = func(b *BaseExprVisitor, e InfixExpr) interface{} {
switch e.Op {
case "iLIKE", "LIKE", "IN", "=":
case "iLIKE", "LIKE", "IN", "=", MatchOperator:
lhs, isColumnRef := e.Left.(ColumnRef)
rhs, isLiteral := e.Right.(LiteralExpr)
if isLiteral && isColumnRef { // we only highlight in this case
switch literalAsString := rhs.Value.(type) {
case string:
literalAsString = strings.TrimPrefix(literalAsString, "'%")
literalAsString = strings.TrimPrefix(literalAsString, "'")
literalAsString = strings.TrimPrefix(literalAsString, "%")
literalAsString = strings.TrimSuffix(literalAsString, "'")
literalAsString = strings.TrimSuffix(literalAsString, "%")
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ func (cw *ClickhouseQueryTranslator) parseMatch(queryMap QueryMap, matchPhrase b
computedIdMatchingQuery := cw.parseIds(QueryMap{"values": []interface{}{subQuery}})
statements = append(statements, computedIdMatchingQuery.WhereClause)
} else {
simpleStat := model.NewInfixExpr(model.NewColumnRef(fieldName), "iLIKE", model.NewLiteral("'%"+subQuery+"%'"))
simpleStat := model.NewInfixExpr(model.NewColumnRef(fieldName), model.MatchOperator, model.NewLiteral("'"+subQuery+"'"))
statements = append(statements, simpleStat)
}
}
Expand Down
45 changes: 45 additions & 0 deletions quesma/quesma/schema_transformer.go
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
{TransformationName: "GeoTransformation", Transformation: s.applyGeoTransformations},
{TransformationName: "ArrayTransformation", Transformation: s.applyArrayTransformations},
{TransformationName: "MapTransformation", Transformation: s.applyMapTransformations},
{TransformationName: "MatchOperatorTransformation", Transformation: s.applyMatchOperator},

// Section 4: compensations and checks
{TransformationName: "BooleanLiteralTransformation", Transformation: s.applyBooleanLiteralLowering},
Expand Down Expand Up @@ -789,3 +790,47 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
}
return queries, nil
}

func (s *SchemaCheckPass) applyMatchOperator(indexSchema schema.Schema, query *model.Query) (*model.Query, error) {

visitor := model.NewBaseVisitor()

var err error

visitor.OverrideVisitInfix = func(b *model.BaseExprVisitor, e model.InfixExpr) interface{} {
lhs, ok := e.Left.(model.ColumnRef)
rhs, ok2 := e.Right.(model.LiteralExpr)

if ok && ok2 && e.Op == model.MatchOperator {
field, found := indexSchema.ResolveFieldByInternalName(lhs.ColumnName)
if !found {
logger.Error().Msgf("Field %s not found in schema for table %s, should never happen here", lhs.ColumnName, query.TableName)
}

rhsValue := rhs.Value.(string)
rhsValue = strings.TrimPrefix(rhsValue, "'")
rhsValue = strings.TrimSuffix(rhsValue, "'")

switch field.Type.String() {
case schema.QuesmaTypeInteger.Name, schema.QuesmaTypeLong.Name, schema.QuesmaTypeUnsignedLong.Name, schema.QuesmaTypeBoolean.Name:
return model.NewInfixExpr(lhs, "=", model.NewLiteral(rhsValue))
default:
return model.NewInfixExpr(lhs, "iLIKE", model.NewLiteral("'%"+rhsValue+"%'"))
}
}

return model.NewInfixExpr(e.Left.Accept(b).(model.Expr), e.Op, e.Right.Accept(b).(model.Expr))
}

expr := query.SelectCommand.Accept(visitor)

if err != nil {
return nil, err
}

if _, ok := expr.(*model.SelectCommand); ok {
query.SelectCommand = *expr.(*model.SelectCommand)
}
return query, nil

}
104 changes: 104 additions & 0 deletions quesma/quesma/schema_transformer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -979,3 +979,107 @@ func TestFullTextFields(t *testing.T) {
})
}
}

func Test_applyMatchOperator(t *testing.T) {
schemaTable := schema.Table{
Columns: map[string]schema.Column{
"message": {Name: "message", Type: "String"},
"count": {Name: "count", Type: "Int64"},
},
}

tests := []struct {
name string
query *model.Query
expected *model.Query
}{
{
name: "match operator transformation for String (ILIKE)",
query: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("message"),
model.MatchOperator,
model.NewLiteral("'needle'"),
),
},
},
expected: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("message"),
"iLIKE",
model.NewLiteral("'%needle%'"),
),
},
},
},
{
name: "match operator transformation for Int64 (=)",
query: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("count"),
model.MatchOperator,
model.NewLiteral("'123'"),
),
},
},
expected: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("count"),
"=",
model.NewLiteral("123"),
),
},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tableDiscovery :=
fixedTableProvider{tables: map[string]schema.Table{
"test": schemaTable,
}}

indexConfig := map[string]config.IndexConfiguration{
"test": {
Name: "test",
},
}

cfg := config.QuesmaConfiguration{
IndexConfig: indexConfig,
}

s := schema.NewSchemaRegistry(tableDiscovery, &cfg, clickhouse.SchemaTypeAdapter{})
transform := &SchemaCheckPass{cfg: &cfg}

indexSchema, ok := s.FindSchema("test")
if !ok {
t.Fatal("schema not found")
}

actual, err := transform.applyMatchOperator(indexSchema, tt.query)
if err != nil {
t.Fatal(err)
}

assert.Equal(t, model.AsString(tt.expected.SelectCommand), model.AsString(actual.SelectCommand))
})
}
}
18 changes: 9 additions & 9 deletions quesma/testdata/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1844,7 +1844,7 @@ var AggregationTests = []AggregationTestCase{
"@timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS "aggr__0__1__key_0",
count(*) AS "aggr__0__1__count"
FROM __quesma_table_name
WHERE ("host.name" iLIKE '%prometheus%' AND ("@timestamp">=
WHERE (("host.name" __quesma_match 'prometheus') AND ("@timestamp">=
fromUnixTimestamp64Milli(1706891809940) AND "@timestamp"<=
fromUnixTimestamp64Milli(1707496609940)))
GROUP BY "severity" AS "aggr__0__key_0",
Expand Down Expand Up @@ -2848,8 +2848,8 @@ var AggregationTests = []AggregationTestCase{
"@timestamp") AS "metric__earliest_timestamp_col_0", maxOrNull("@timestamp")
AS "metric__latest_timestamp_col_0"
FROM ` + TableName + `
WHERE ((` + fullTextFieldName + ` iLIKE '%posei%' AND "message" iLIKE '%User logged out%') AND
"host.name" iLIKE '%poseidon%')`,
WHERE ((` + fullTextFieldName + ` iLIKE '%posei%' AND ("message" __quesma_match 'User logged out')) AND
("host.name" __quesma_match 'poseidon'))`,
},
{ // [15]
TestName: "date_histogram: regression test",
Expand Down Expand Up @@ -6195,7 +6195,7 @@ var AggregationTests = []AggregationTestCase{
"aggr__0__1__parent_count", "message" AS "aggr__0__1__key_0",
count(*) AS "aggr__0__1__count"
FROM __quesma_table_name
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0", "message" AS "aggr__0__1__key_0"))
WHERE ("aggr__0__order_1_rank"<=11 AND "aggr__0__1__order_1_rank"<=4)
ORDER BY "aggr__0__order_1_rank" ASC, "aggr__0__1__order_1_rank" ASC`,
Expand Down Expand Up @@ -6311,7 +6311,7 @@ var AggregationTests = []AggregationTestCase{
"aggr__0__1__2__parent_count", "message" AS "aggr__0__1__2__key_0",
count(*) AS "aggr__0__1__2__count"
FROM __quesma_table_name
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0", "message" AS "aggr__0__1__key_0",
"message" AS "aggr__0__1__2__key_0"))
WHERE (("aggr__0__order_1_rank"<=11 AND "aggr__0__1__order_1_rank"<=4) AND
Expand Down Expand Up @@ -6403,7 +6403,7 @@ var AggregationTests = []AggregationTestCase{
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
FROM ` + TableName + `
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0",
"FlightDelayMin" AS "aggr__0__1__key_0"))
WHERE "aggr__0__order_1_rank"<=9
Expand Down Expand Up @@ -6513,7 +6513,7 @@ var AggregationTests = []AggregationTestCase{
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
FROM ` + TableName + `
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0",
"FlightDelayMin" AS "aggr__0__1__key_0"))
WHERE "aggr__0__order_1_rank"<=11
Expand Down Expand Up @@ -6610,7 +6610,7 @@ var AggregationTests = []AggregationTestCase{
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
FROM __quesma_table_name
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0",
"FlightDelayMin" AS "aggr__0__1__key_0"))
WHERE "aggr__0__order_1_rank"<=11
Expand Down Expand Up @@ -6826,7 +6826,7 @@ var AggregationTests = []AggregationTestCase{
count(*) AS "aggr__2__count",
sumOrNull("total") AS "metric__2__1_col_0"
FROM ` + TableName + `
WHERE NOT ((("abc">=0 AND "abc"<600) OR "type" iLIKE '%def%'))
WHERE NOT ((("abc">=0 AND "abc"<600) OR ("type" __quesma_match 'def')))
GROUP BY "name" AS "aggr__2__key_0"
ORDER BY "metric__2__1_col_0" DESC, "aggr__2__key_0" ASC
LIMIT 11`,
Expand Down
2 changes: 1 addition & 1 deletion quesma/testdata/clients/clover.go
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ var CloverTests = []testdata.AggregationTestCase{
"field" AS "aggr__other-filter__3__key_0",
count(*) AS "aggr__other-filter__3__count"
FROM __quesma_table_name
WHERE ("a" iLIKE '%b%' AND "c" iLIKE '%d%')
WHERE (("a" __quesma_match 'b') AND ("c" __quesma_match 'd'))
GROUP BY "field" AS "aggr__other-filter__3__key_0"
ORDER BY "aggr__other-filter__3__count" DESC,
"aggr__other-filter__3__key_0" ASC
Expand Down
18 changes: 9 additions & 9 deletions quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`"host_name" iLIKE '%prometheus%'`},
[]string{`("host_name" __quesma_match 'prometheus')`},
model.ListAllFields,
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "host_name" iLIKE '%prometheus%' LIMIT 10`},
[]string{},
Expand All @@ -1148,7 +1148,7 @@ var TestsSearch = []SearchTestCase{
"size": 100,
"track_total_hits": false
}`,
[]string{`((("message" iLIKE '%this%' OR "message" iLIKE '%is%') OR "message" iLIKE '%a%') OR "message" iLIKE '%test%')`},
[]string{`(((("message" __quesma_match 'this') OR ("message" __quesma_match 'is')) OR ("message" __quesma_match 'a')) OR ("message" __quesma_match 'test'))`},
model.ListAllFields,
[]string{
`SELECT "message" FROM ` + TableName + ` WHERE ((("message" iLIKE '%this%' OR "message" iLIKE '%is%') ` +
Expand Down Expand Up @@ -1405,7 +1405,7 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`"message" iLIKE '%this is a test%'`},
[]string{`("message" __quesma_match 'this is a test')`},
model.ListAllFields,
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" iLIKE '%this is a test%'`},
[]string{},
Expand All @@ -1423,7 +1423,7 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`"message" iLIKE '%this is a test%'`},
[]string{`("message" __quesma_match 'this is a test')`},
model.ListAllFields,
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" iLIKE '%this is a test%'`},
[]string{},
Expand Down Expand Up @@ -1687,9 +1687,9 @@ var TestsSearch = []SearchTestCase{
"track_total_hits": true
}`,
[]string{
`(("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491)))`,
`((("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`(((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491))) ` +
`AND "stream.namespace" IS NOT NULL)`,
},
Expand Down Expand Up @@ -1847,10 +1847,10 @@ var TestsSearch = []SearchTestCase{
"timeout": "1000ms"
}`,
[]string{
`((("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`(((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491))) ` +
`AND "namespace" IS NOT NULL)`,
`(("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491)))`,
},
model.Normal,
Expand Down Expand Up @@ -2085,7 +2085,7 @@ var TestsSearch = []SearchTestCase{
"track_total_hits": false,
"size": 12
}`,
[]string{`("message" iLIKE '%User logged out%' AND "message" iLIKE '%User logged out%')`},
[]string{`(("message" __quesma_match 'User logged out') AND ("message" __quesma_match 'User logged out'))`},
model.ListAllFields,
[]string{
`SELECT "message" ` +
Expand Down
Loading