Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use vector index when the SELECT cause has a projection. #2817

Merged
merged 6 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions enginetest/queries/index_query_plans.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions enginetest/queries/vector_index_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ var VectorIndexQueries = []ScriptTest{
},
ExpectedIndexes: []string{"v_idx"},
},
{
// Use the index even when there's a projection involved.
Query: "select `id`+1 from vectors order by VEC_DISTANCE('[0.0,0.0]', v) limit 4",
Expected: []sql.Row{
{3},
{4},
{5},
{2},
},
ExpectedIndexes: []string{"v_idx"},
},
{
// Only queries with a limit can use a vector index.
Query: "select * from vectors order by VEC_DISTANCE('[0.0,0.0]', v)",
Expand Down Expand Up @@ -88,6 +99,21 @@ var VectorIndexQueries = []ScriptTest{
},
ExpectedIndexes: []string{},
},
{
// Modify the index after creation.
Query: "insert into vectors values (5, '[1.0,0.0]')",
},
{
Query: "select * from vectors order by VEC_DISTANCE('[0.0,0.0]', v)",
Expected: []sql.Row{
{2, types.MustJSON(`[0.0, 0.0]`)},
{5, types.MustJSON(`[1.0, 0.0]`)},
{3, types.MustJSON(`[-1.0, 1.0]`)},
{4, types.MustJSON(`[0.0, -2.0]`)},
{1, types.MustJSON(`[4.0, 3.0]`)},
},
ExpectedIndexes: []string{},
},
},
},
}
24 changes: 24 additions & 0 deletions enginetest/scriptgen/setup/scripts/comp_index_tables
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,28 @@ create table pref_index_t3 (v1 varchar(10), v2 varchar(10), unique index (v1(3),

exec
create table pref_index_t4 (i int primary key, v1 varchar(10), v2 varchar(10), unique index (v1(3),v2(5)));
----

exec
CREATE TABLE comp_vector_index_t0 (pk BIGINT PRIMARY KEY, v1 BIGINT, v2 JSON);
----

exec
INSERT INTO comp_vector_index_t0 VALUES (0,0,"[3,16]"),(1,2,"[65,9]"),(2,3,"[38,37]"),(3,3,"[99,99]"),(4,5,"[17,42]"),(5,6,"[6,76]"),(6,6,"[81,33]"),
(7,7,"[33,51]"),(8,7,"[37,42]"),(9,8,"[9,21]"),(10,8,"[37,90]"),(11,9,"[39,20]"),(12,9,"[71,82]"),(13,10,"[16,21]"),(14,10,"[32,46]"),(15,10,"[47,36]"),
(16,12,"[44,84]"),(17,12,"[66,40]"),(18,13,"[47,30]"),(19,13,"[56,41]"),(20,14,"[38,24]"),(21,14,"[91,1]"),(22,15,"[2,69]"),(23,16,"[40,36]"),
(24,20,"[29,93]"),(25,21,"[9,89]"),(26,21,"[42,76]"),(27,23,"[13,53]"),(28,23,"[28,68]"),(29,23,"[28,90]"),(30,23,"[30,44]"),(31,24,"[20,8]"),
(32,25,"[49,88]"),(33,26,"[15,28]"),(34,27,"[35,12]"),(35,28,"[39,84]"),(36,29,"[7,38]"),(37,29,"[21,74]"),(38,29,"[27,48]"),(39,29,"[77,46]"),
(40,31,"[47,21]"),(41,31,"[47,91]"),(42,32,"[40,76]"),(43,33,"[70,50]"),(44,34,"[27,58]"),(45,35,"[32,36]"),(46,36,"[4,36]"),(47,36,"[84,75]"),
(48,37,"[27,32]"),(49,38,"[88,68]"),(50,41,"[17,68]"),(51,41,"[77,26]"),(52,42,"[80,85]"),(53,45,"[1,57]"),(54,46,"[58,8]"),(55,49,"[26,11]"),
(56,50,"[49,20]"),(57,50,"[86,6]"),(58,54,"[13,78]"),(59,54,"[57,83]"),(60,55,"[45,46]"),(61,55,"[81,80]"),(62,56,"[0,97]"),(63,56,"[8,78]"),
(64,56,"[58,4]"),(65,56,"[66,33]"),(66,57,"[7,52]"),(67,59,"[77,53]"),(68,60,"[8,70]"),(69,61,"[11,25]"),(70,63,"[85,23]"),(71,65,"[17,9]"),
(72,66,"[46,46]"),(73,66,"[73,4]"),(74,67,"[55,27]"),(75,70,"[8,54]"),(76,70,"[58,33]"),(77,71,"[39,15]"),(78,72,"[65,64]"),(79,74,"[78,26]"),
(80,75,"[91,35]"),(81,76,"[40,52]"),(82,76,"[44,87]"),(83,81,"[32,4]"),(84,82,"[11,6]"),(85,82,"[46,32]"),(86,84,"[40,8]"),(87,84,"[93,37]"),
(88,85,"[53,50]"),(89,86,"[63,79]"),(90,87,"[22,34]"),(91,87,"[57,62]"),(92,88,"[88,42]"),(93,90,"[30,67]"),(94,91,"[15,15]"),(95,93,"[7,26]"),
(96,94,"[92,38]"),(97,95,"[89,66]"),(98,97,"[63,19]"),(99,98,"[31,21]"),(100,98,"[42,22]")
----

exec
create VECTOR INDEX v_idx on comp_vector_index_t0 (v2)
----
15 changes: 15 additions & 0 deletions enginetest/scriptgen/setup/setup_data.sg.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 10 additions & 7 deletions sql/analyzer/replace_order_by_distance.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,20 @@ import (

// replaceIdxSort applies an IndexAccess when there is an `OrderBy` over a prefix of any columns with Indexes
func replaceIdxOrderByDistance(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector, qFlags *sql.QueryFlags) (sql.Node, transform.TreeIdentity, error) {
return replaceIdxOrderByDistanceHelper(ctx, scope, n, nil)
return replaceIdxOrderByDistanceHelper(ctx, scope, n, nil, nil)
}

func replaceIdxOrderByDistanceHelper(ctx *sql.Context, scope *plan.Scope, node sql.Node, sortNode *plan.TopN) (sql.Node, transform.TreeIdentity, error) {
func replaceIdxOrderByDistanceHelper(ctx *sql.Context, scope *plan.Scope, node sql.Node, sortNode plan.Sortable, limit sql.Expression) (sql.Node, transform.TreeIdentity, error) {
switch n := node.(type) {
case *plan.TopN:
sortNode = n // lowest parent sort node
limit = n.Limit
case plan.Sortable:
sortNode = n
case *plan.Limit:
limit = n.Limit
case *plan.ResolvedTable:
if sortNode == nil {
if sortNode == nil || limit == nil {
return n, transform.SameTree, nil
}

Expand All @@ -44,7 +49,7 @@ func replaceIdxOrderByDistanceHelper(ctx *sql.Context, scope *plan.Scope, node s

// Column references have not been assigned their final indexes yet, so do that for the ORDER BY expression now.
// We can safely do this because an expression that references other tables won't pass `isSortFieldsValidPrefix` below.
sortNode = offsetAssignIndexes(sortNode).(*plan.TopN)
sortNode = offsetAssignIndexes(sortNode).(plan.Sortable)

sfExprs := normalizeExpressions(tableAliases, sortNode.GetSortFields().ToExpressions()...)
sfAliases := aliasedExpressionsInNode(sortNode)
Expand Down Expand Up @@ -100,8 +105,6 @@ func replaceIdxOrderByDistanceHelper(ctx *sql.Context, scope *plan.Scope, node s
return n, transform.SameTree, nil
}

limit := sortNode.Limit

lookup := sql.IndexLookup{
Index: idx,
Ranges: sql.MySQLRangeCollection{},
Expand All @@ -125,7 +128,7 @@ func replaceIdxOrderByDistanceHelper(ctx *sql.Context, scope *plan.Scope, node s
same := transform.SameTree
switch c := child.(type) {
case *plan.Project, *plan.TableAlias, *plan.ResolvedTable, *plan.Filter, *plan.Limit, *plan.TopN, *plan.Offset, *plan.Sort, *plan.IndexedTableAccess:
newChildren[i], same, err = replaceIdxOrderByDistanceHelper(ctx, scope, child, sortNode)
newChildren[i], same, err = replaceIdxOrderByDistanceHelper(ctx, scope, child, sortNode, limit)
default:
newChildren[i] = c
}
Expand Down
3 changes: 1 addition & 2 deletions sql/analyzer/vector_index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func TestVectorIndex(t *testing.T) {

for _, testCase := range vectorIndexTestCases(t, db, vectorIndexTable) {
t.Run(testCase.name, func(t *testing.T) {
res, same, err := replaceIdxOrderByDistanceHelper(nil, nil, testCase.inputPlan, nil)
res, same, err := replaceIdxOrderByDistanceHelper(nil, nil, testCase.inputPlan, nil, nil)
require.NoError(t, err)
require.Equal(t, testCase.usesVectorIndex, !bool(same))
res = offsetAssignIndexes(res)
Expand Down Expand Up @@ -218,7 +218,6 @@ func (i vectorIndexTable) SkipIndexCosting() bool {
}

func (i vectorIndexTable) IndexWithPrefix(ctx *sql.Context, expressions []string) (sql.Index, error) {
//TODO implement me
panic("implement me")
}

Expand Down
Loading