Skip to content

Commit

Permalink
Create prefix impl table and ability to create prefixed vector index (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
MBkkt authored Feb 14, 2025
1 parent 1ee0489 commit 8c57cab
Show file tree
Hide file tree
Showing 17 changed files with 351 additions and 102 deletions.
74 changes: 44 additions & 30 deletions ydb/core/base/table_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,22 @@ bool Contains(const auto& names, std::string_view str) {
return std::find(std::begin(names), std::end(names), str) != std::end(names);
}

constexpr std::string_view ImplTables[] = {
ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable,
bool ContainsSystemColumn(const auto& columns) {
for (const auto& column : columns) {
if (column.StartsWith(SYSTEM_COLUMN_PREFIX)) {
return true;
}
}
return false;
}

const TString ImplTables[] = {
ImplTable,
NTableVectorKmeansTreeIndex::LevelTable,
NTableVectorKmeansTreeIndex::PostingTable,
NTableVectorKmeansTreeIndex::PrefixTable,
TString{NTableVectorKmeansTreeIndex::PostingTable} + NTableVectorKmeansTreeIndex::BuildSuffix0,
TString{NTableVectorKmeansTreeIndex::PostingTable} + NTableVectorKmeansTreeIndex::BuildSuffix1,
};

constexpr std::string_view GlobalSecondaryImplTables[] = {
Expand All @@ -42,18 +56,21 @@ constexpr std::string_view GlobalKMeansTreeImplTables[] = {
};
static_assert(std::is_sorted(std::begin(GlobalKMeansTreeImplTables), std::end(GlobalKMeansTreeImplTables)));

constexpr std::string_view PrefixedGlobalKMeansTreeImplTables[] = {
NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable, NTableVectorKmeansTreeIndex::PrefixTable,
};
static_assert(std::is_sorted(std::begin(PrefixedGlobalKMeansTreeImplTables), std::end(PrefixedGlobalKMeansTreeImplTables)));

}

TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index) {
TTableColumns result;

const bool isSecondaryIndex = type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
if (isSecondaryIndex) {
for (const auto& ik : index.KeyColumns) {
result.Keys.push_back(ik);
result.Columns.emplace(ik);
}
}
std::for_each(index.KeyColumns.begin(), index.KeyColumns.end() - (isSecondaryIndex ? 0 : 1), [&] (const auto& ik) {
result.Keys.push_back(ik);
result.Columns.emplace(ik);
});

for (const auto& tk : table.Keys) {
if (result.Columns.emplace(tk).second) {
Expand Down Expand Up @@ -112,40 +129,32 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn

const bool isSecondaryIndex = indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;

if (index.KeyColumns.size() < 1) {
explain = "should be at least single index key column";
return false;
}
if (isSecondaryIndex) {
if (index.KeyColumns.size() < 1) {
explain = "should be at least single index key column";
return false;
}
if (index.KeyColumns == table.Keys) {
explain = "index keys shouldn't be table keys";
return false;
}
} else {
if (index.KeyColumns.size() != 1) {
explain = "only single key column is supported for vector index";
return false;
}

if (Contains(table.Keys, NTableVectorKmeansTreeIndex::ParentColumn)) {
explain = TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
if (ContainsSystemColumn(table.Keys)) {
explain = TStringBuilder() << "table key column shouldn't have a reserved name";
return false;
}
if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::ParentColumn)) {
// This isn't really needed, but it will be really strange to have column with such name but different meaning
explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
if (ContainsSystemColumn(index.KeyColumns)) {
explain = TStringBuilder() << "index key column shouldn't have a reserved name";
return false;
}
if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::ParentColumn)) {
explain = TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
if (ContainsSystemColumn(index.DataColumns)) {
explain = TStringBuilder() << "index data column shouldn't have a reserved name";
return false;
}
}
tmp.clear();
tmp.insert(table.Keys.begin(), table.Keys.end());
if (isSecondaryIndex) {
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end());
}
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end() - (isSecondaryIndex ? 0 : 1));
if (const auto* broken = IsContains(index.DataColumns, tmp, true)) {
explain = TStringBuilder()
<< "the same column can't be used as key and data column for one index, for example " << *broken;
Expand All @@ -154,9 +163,13 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
return true;
}

std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType) {
std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span<const TString> indexKeys) {
if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) {
return GlobalKMeansTreeImplTables;
if (indexKeys.size() == 1) {
return GlobalKMeansTreeImplTables;
} else {
return PrefixedGlobalKMeansTreeImplTables;
}
} else {
return GlobalSecondaryImplTables;
}
Expand All @@ -168,7 +181,8 @@ bool IsImplTable(std::string_view tableName) {

bool IsBuildImplTable(std::string_view tableName) {
// all impl tables that ends with "build" should be used only for index creation and dropped when index build is finished
return tableName.ends_with("build");
return tableName.ends_with(NTableVectorKmeansTreeIndex::BuildSuffix0)
|| tableName.ends_with(NTableVectorKmeansTreeIndex::BuildSuffix1);
}

}
9 changes: 7 additions & 2 deletions ydb/core/base/table_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
#include <span>
#include <string_view>

namespace NKikimr::NTableIndex {
namespace NKikimr {

inline constexpr const char* SYSTEM_COLUMN_PREFIX = "__ydb_";

namespace NTableIndex {

struct TTableColumns {
THashSet<TString> Columns;
Expand All @@ -27,8 +31,9 @@ inline constexpr const char* ImplTable = "indexImplTable";
bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index, TString& explain);
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index);

std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType);
std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span<const TString> indexKeys);
bool IsImplTable(std::string_view tableName);
bool IsBuildImplTable(std::string_view tableName);

}
}
3 changes: 3 additions & 0 deletions ydb/core/base/table_vector_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@ inline constexpr const char* PostingTable = "indexImplPostingTable";
inline constexpr const char* BuildSuffix0 = "0build";
inline constexpr const char* BuildSuffix1 = "1build";

// Prefix table
inline constexpr const char* PrefixTable = "indexImplPrefixTable";

}
12 changes: 6 additions & 6 deletions ydb/core/base/ut/table_index_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ Y_UNIT_TEST_SUITE (TableIndex) {

UNIT_ASSERT(IsCompatibleIndex(type, Table, {{"DATA1"}, {"DATA1"}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "");

UNIT_ASSERT(IsCompatibleIndex(type, Table, {{"DATA1", "DATA2"}, {}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "");
}

Y_UNIT_TEST (NotCompatibleVectorIndex) {
Expand All @@ -111,26 +114,23 @@ Y_UNIT_TEST_SUITE (TableIndex) {
UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1", "DATA1"}, {}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "all index key columns should be unique, for example DATA1");

UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1", "DATA2"}, {}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "only single key column is supported for vector index");

UNIT_ASSERT(!IsCompatibleIndex(type, Table, {{"DATA1"}, {"PK2"}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "the same column can't be used as key and data column for one index, for example PK2");

{
const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {"PK"}};

UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::ParentColumn}, {}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name");

UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::ParentColumn}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name");
}
{
const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {NTableVectorKmeansTreeIndex::ParentColumn}};

UNIT_ASSERT(!IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name");
}
}
}
Expand Down
20 changes: 9 additions & 11 deletions ydb/core/kqp/gateway/kqp_metadata_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,15 +401,14 @@ TString GetDebugString(const std::pair<NKikimr::TIndexId, TString>& id) {
return TStringBuilder() << " Path: " << id.second << " TableId: " << id.first;
}

void UpdateMetadataIfSuccess(NYql::TKikimrTableMetadataPtr& implTable, TTableMetadataResult& value) {
void UpdateMetadataIfSuccess(NYql::TKikimrTableMetadataPtr* implTable, TTableMetadataResult& value) {
YQL_ENSURE(implTable);
YQL_ENSURE(value.Success());
if (!implTable) {
implTable = std::move(value.Metadata);
return;
while (*implTable) {
YQL_ENSURE((*implTable)->Name < value.Metadata->Name);
implTable = &(*implTable)->Next;
}
YQL_ENSURE(!implTable->Next);
YQL_ENSURE(implTable->Name < value.Metadata->Name);
implTable->Next = std::move(value.Metadata);
*implTable = std::move(value.Metadata);
}

void SetError(TTableMetadataResult& externalDataSourceMetadata, const TString& error) {
Expand Down Expand Up @@ -630,7 +629,7 @@ NThreading::TFuture<TTableMetadataResult> TKqpTableMetadataLoader::LoadIndexMeta

for (size_t i = 0; i < indexesCount; i++) {
const auto& index = tableMetadata->Indexes[i];
const auto implTablePaths = NSchemeHelpers::CreateIndexTablePath(tableName, index.Type, index.Name);
const auto implTablePaths = NSchemeHelpers::CreateIndexTablePath(tableName, index);
for (const auto& implTablePath : implTablePaths) {
if (!index.SchemaVersion) {
LOG_DEBUG_S(*ActorSystem, NKikimrServices::KQP_GATEWAY, "Load index metadata without schema version check index: " << index.Name);
Expand Down Expand Up @@ -664,13 +663,12 @@ NThreading::TFuture<TTableMetadataResult> TKqpTableMetadataLoader::LoadIndexMeta
result.Metadata->ImplTables.resize(indexesCount);
auto it = children.begin();
for (size_t i = 0; i < indexesCount; i++) {
for (const auto& _ : NTableIndex::GetImplTables(NYql::TIndexDescription::ConvertIndexType(
result.Metadata->Indexes[i].Type))) {
for (const auto& _ : result.Metadata->Indexes[i].GetImplTables()) {
YQL_ENSURE(it != children.end());
auto value = it++->ExtractValue();
result.AddIssues(value.Issues());
if (loadOk && (loadOk = value.Success())) {
UpdateMetadataIfSuccess(result.Metadata->ImplTables[i], value);
UpdateMetadataIfSuccess(&result.Metadata->ImplTables[i], value);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions ydb/core/kqp/gateway/utils/scheme_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ bool SplitTablePath(const TString& tableName, const TString& database, std::pair
}
}

TVector<TString> CreateIndexTablePath(const TString& tableName, NYql::TIndexDescription::EType indexType, const TString& indexName) {
auto implTables = NTableIndex::GetImplTables(NYql::TIndexDescription::ConvertIndexType(indexType));
TVector<TString> CreateIndexTablePath(const TString& tableName, const NYql::TIndexDescription& index) {
const auto implTables = index.GetImplTables();
TVector<TString> paths;
paths.reserve(implTables.size());
for (const auto& implTable : implTables) {
paths.emplace_back(TStringBuilder() << tableName << "/" << indexName << "/" << implTable);
paths.emplace_back(TStringBuilder() << tableName << "/" << index.Name << "/" << implTable);
}
return paths;
}
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/kqp/gateway/utils/scheme_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ bool TrySplitTablePath(const TString& path, std::pair<TString, TString>& result,
bool SplitTablePath(const TString& tableName, const TString& database, std::pair<TString, TString>& pathPair,
TString& error, bool createDir);

TVector<TString> CreateIndexTablePath(const TString& tableName, NYql::TIndexDescription::EType indexType, const TString& indexName);
TVector<TString> CreateIndexTablePath(const TString& tableName, const NYql::TIndexDescription& index);

bool SetDatabaseForLoginOperation(TString& result, bool getDomainLoginOnly, TMaybe<TString> domainName,
const TString& database);
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/kqp/provider/yql_kikimr_exec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1847,7 +1847,7 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
TStringBuilder() << "Unknown index name: " << indexName));
return SyncError();
}
auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(table.Metadata->Name, indexIter->Type, indexName);
auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(table.Metadata->Name, *indexIter);
if (indexTablePaths.size() != 1) {
ctx.AddError(
TIssue(ctx.GetPosition(indexSetting.Name().Pos()),
Expand Down Expand Up @@ -2642,7 +2642,7 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
if (auto maybeAnalyze = TMaybeNode<TKiAnalyzeTable>(input)) {
if (!SessionCtx->Config().FeatureFlags.GetEnableColumnStatistics()) {
ctx.AddError(TIssue("ANALYZE command is not supported because `EnableColumnStatistics` feature flag is off"));
return SyncError();
return SyncError();
}

auto cluster = TString(maybeAnalyze.Cast().DataSink().Cluster());
Expand Down
17 changes: 13 additions & 4 deletions ydb/core/kqp/provider/yql_kikimr_gateway.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <ydb/core/external_sources/external_source_factory.h>
#include <ydb/core/kqp/query_data/kqp_query_data.h>
#include <ydb/core/kqp/query_data/kqp_prepared_query.h>
#include <ydb/core/base/table_index.h>
#include <ydb/core/protos/flat_scheme_op.pb.h>
#include <ydb/core/protos/kqp.pb.h>
#include <ydb/core/protos/kqp_stats.pb.h>
Expand Down Expand Up @@ -205,6 +206,10 @@ struct TIndexDescription {
return true;
}
}

std::span<const std::string_view> GetImplTables() const {
return NKikimr::NTableIndex::GetImplTables(NYql::TIndexDescription::ConvertIndexType(Type), KeyColumns);
}
};

struct TColumnFamily {
Expand Down Expand Up @@ -547,11 +552,15 @@ struct TKikimrTableMetadata : public TThrRefBase {
auto it = message->GetSecondaryGlobalIndexMetadata().begin();
ImplTables.reserve(indexesCount);
for(int i = 0; i < indexesCount; ++i) {
YQL_ENSURE(it != message->GetSecondaryGlobalIndexMetadata().end());
auto& implTable = ImplTables.emplace_back(MakeIntrusive<TKikimrTableMetadata>(&*it++));
if (Indexes[i].Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree) {
decltype(ImplTables)::value_type* implTable = nullptr;
for (const auto& _ : Indexes[i].GetImplTables()) {
YQL_ENSURE(it != message->GetSecondaryGlobalIndexMetadata().end());
implTable->Next = MakeIntrusive<TKikimrTableMetadata>(&*it++);
if (implTable) {
implTable = &ImplTables.emplace_back(MakeIntrusive<TKikimrTableMetadata>(&*it++));
} else {
(*implTable)->Next = MakeIntrusive<TKikimrTableMetadata>(&*it++);
implTable = &(*implTable)->Next;
}
}
}
YQL_ENSURE(it == message->GetSecondaryGlobalIndexMetadata().end());
Expand Down
6 changes: 3 additions & 3 deletions ydb/core/kqp/provider/yql_kikimr_opt_build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ struct TKiExploreTxResults {
});
YQL_ENSURE(indexIt != tableMeta->Indexes.end(), "Index not found");

const auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, indexIt->Type, indexName);
const auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, *indexIt);

THashSet<TString> indexColumns;
indexColumns.reserve(indexIt->KeyColumns.size() + indexIt->DataColumns.size());
Expand Down Expand Up @@ -187,7 +187,7 @@ struct TKiExploreTxResults {
continue;
}

const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index.Type, index.Name);
const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index);
YQL_ENSURE(indexTables.size() == 1, "Only index with one impl table is supported");
const auto indexTable = indexTables[0];

Expand All @@ -211,7 +211,7 @@ struct TKiExploreTxResults {
continue;
}

const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index.Type, index.Name);
const auto indexTables = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, index);
YQL_ENSURE(indexTables.size() == 1, "Only index with one impl table is supported");
const auto indexTable = indexTables[0];

Expand Down
Loading

0 comments on commit 8c57cab

Please sign in to comment.