From 7c76d8c49c83f09ec093acff472500770338af4e Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 28 Jan 2024 10:15:55 +0200 Subject: [PATCH 1/5] schemadiff: analyze foreign key loops Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/errors.go | 23 +++++++++++++ go/vt/schemadiff/schema.go | 49 ++++++++++++++++++++++++++ go/vt/schemadiff/schema_test.go | 61 ++++++++++++++++++++++++++++++--- 3 files changed, 128 insertions(+), 5 deletions(-) diff --git a/go/vt/schemadiff/errors.go b/go/vt/schemadiff/errors.go index 723240f0d02..55ff7cb6d7c 100644 --- a/go/vt/schemadiff/errors.go +++ b/go/vt/schemadiff/errors.go @@ -286,6 +286,29 @@ func (e *ForeignKeyDependencyUnresolvedError) Error() string { sqlescape.EscapeID(e.Table)) } +type ForeignKeyLoopError struct { + Table string + Loop []string +} + +func (e *ForeignKeyLoopError) Error() string { + tableIsInsideLoop := false + + escaped := make([]string, 0, len(e.Loop)) + for _, t := range e.Loop { + escaped = append(escaped, sqlescape.EscapeID(t)) + if t == e.Table { + tableIsInsideLoop = true + } + } + if tableIsInsideLoop { + return fmt.Sprintf("table %s participates in foreign key loop: %s", + sqlescape.EscapeID(e.Table), strings.Join(escaped, ", ")) + } + return fmt.Sprintf("table %s references foreign key loop: %s", + sqlescape.EscapeID(e.Table), strings.Join(escaped, ", ")) +} + type ForeignKeyNonexistentReferencedTableError struct { Table string ReferencedTable string diff --git a/go/vt/schemadiff/schema.go b/go/vt/schemadiff/schema.go index 84f12e1168a..ef88a7962eb 100644 --- a/go/vt/schemadiff/schema.go +++ b/go/vt/schemadiff/schema.go @@ -39,6 +39,7 @@ type Schema struct { foreignKeyParents []*CreateTableEntity // subset of tables foreignKeyChildren []*CreateTableEntity // subset of tables + foreignKeyLoopMap map[string][]string // map of table name that either participate, or directly or indirectly reference foreign key loops env *Environment } @@ -53,6 +54,7 @@ func newEmptySchema(env *Environment) *Schema { foreignKeyParents: []*CreateTableEntity{}, foreignKeyChildren: []*CreateTableEntity{}, + foreignKeyLoopMap: map[string][]string{}, env: env, } @@ -135,6 +137,44 @@ func getForeignKeyParentTableNames(createTable *sqlparser.CreateTable) (names [] return names } +// findForeignKeyLoop is a stateful recursive function that determines whether a given table participates in a foreign +// key loop or derives from one. It returns a list of table names that form a loop, or nil if no loop is found. +// The function updates and checks the stateful map s.foreignKeyLoopMap to avoid re-analyzing the same table twice. +func (s *Schema) findForeignKeyLoop(tableName string, seen []string) (loop []string) { + if loop := s.foreignKeyLoopMap[tableName]; loop != nil { + return loop + } + t := s.Table(tableName) + if t == nil { + return nil + } + seen = append(seen, tableName) + for i, seenTable := range seen { + if i == len(seen)-1 { + // as we've just appended the table name to the end of the slice, we should skip it. + break + } + if seenTable == tableName { + // This table alreay appears in `seen`. + // We only return the loop portion of `seen` that starts and ends with this table. + return seen[i:] + } + } + for _, referencedTableName := range getForeignKeyParentTableNames(t.CreateTable) { + if loop := s.findForeignKeyLoop(referencedTableName, seen); loop != nil { + // Found loop. Update cache. + // It's possible for one table to participate in more than one foreign key loop, but + // we suffice with one loop, since we already only ever report one foreign key error + // per table. + if _, ok := s.foreignKeyLoopMap[tableName]; !ok { + s.foreignKeyLoopMap[tableName] = loop + } + return loop + } + } + return nil +} + // getViewDependentTableNames analyzes a CREATE VIEW definition and extracts all tables/views read by this view func getViewDependentTableNames(createView *sqlparser.CreateView) (names []string) { _ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) { @@ -309,7 +349,16 @@ func (s *Schema) normalize() error { } iterationLevel++ } + if len(s.sorted) != len(s.tables)+len(s.views) { + + for _, t := range s.tables { + if _, ok := dependencyLevels[t.Name()]; !ok { + if loop := s.findForeignKeyLoop(t.Name(), nil); loop != nil { + errs = errors.Join(errs, addEntityFkError(t, &ForeignKeyLoopError{Table: t.Name(), Loop: loop})) + } + } + } // We have leftover tables or views. This can happen if the schema definition is invalid: // - a table's foreign key references a nonexistent table // - two or more tables have circular FK dependency diff --git a/go/vt/schemadiff/schema_test.go b/go/vt/schemadiff/schema_test.go index 504aa56fd6a..8baefa4c4e4 100644 --- a/go/vt/schemadiff/schema_test.go +++ b/go/vt/schemadiff/schema_test.go @@ -310,8 +310,9 @@ func TestTableForeignKeyOrdering(t *testing.T) { func TestInvalidSchema(t *testing.T) { tt := []struct { - schema string - expectErr error + schema string + expectErr error + expectLoopTables int }{ { schema: "create table t11 (id int primary key, i int, key ix(i), constraint f11 foreign key (i) references t11(id) on delete restrict)", @@ -340,11 +341,60 @@ func TestInvalidSchema(t *testing.T) { expectErr: &ForeignKeyReferencesViewError{Table: "t11", ReferencedView: "v"}, }, { + // t11 self loop + schema: "create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t11 (id) on delete restrict)", + }, + { + // t12<->t11 schema: "create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t12 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict)", expectErr: errors.Join( - &ForeignKeyDependencyUnresolvedError{Table: "t11"}, - &ForeignKeyDependencyUnresolvedError{Table: "t12"}, + &ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}}, + ), + expectLoopTables: 2, + }, + { + // t10, t12<->t11 + schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t12 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict)", + expectErr: errors.Join( + &ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}}, + ), + expectLoopTables: 2, + }, + { + // t10, t12<->t11<-t13 + schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t12 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict); create table t13 (id int primary key, i int, constraint f13 foreign key (i) references t11 (id) on delete restrict)", + expectErr: errors.Join( + &ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t11"}}, + ), + expectLoopTables: 3, + }, + { + // t10 + // ^ + // | + //t12<->t11<-t13 + schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, i10 int, constraint f11 foreign key (i) references t12 (id) on delete restrict, constraint f1110 foreign key (i10) references t10 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict); create table t13 (id int primary key, i int, constraint f13 foreign key (i) references t11 (id) on delete restrict)", + expectErr: errors.Join( + &ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t11"}}, + ), + expectLoopTables: 3, + }, + { + // t10, t12<->t11<-t13<-t14 + schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, i10 int, constraint f11 foreign key (i) references t12 (id) on delete restrict, constraint f1110 foreign key (i10) references t10 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict); create table t13 (id int primary key, i int, constraint f13 foreign key (i) references t11 (id) on delete restrict); create table t14 (id int primary key, i int, constraint f14 foreign key (i) references t13 (id) on delete restrict)", + expectErr: errors.Join( + &ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t11"}}, + &ForeignKeyLoopError{Table: "t14", Loop: []string{"t11", "t12", "t11"}}, ), + expectLoopTables: 4, }, { schema: "create table t11 (id int primary key, i int, key ix(i), constraint f11 foreign key (i) references t11(id2) on delete restrict)", @@ -406,13 +456,14 @@ func TestInvalidSchema(t *testing.T) { for _, ts := range tt { t.Run(ts.schema, func(t *testing.T) { - _, err := NewSchemaFromSQL(NewTestEnv(), ts.schema) + s, err := NewSchemaFromSQL(NewTestEnv(), ts.schema) if ts.expectErr == nil { assert.NoError(t, err) } else { assert.Error(t, err) assert.EqualError(t, err, ts.expectErr.Error()) } + assert.Equal(t, ts.expectLoopTables, len(s.foreignKeyLoopMap)) }) } } From 66ee6ab50abc536c963efb328fa4a02d3027689a Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 28 Jan 2024 11:07:56 +0200 Subject: [PATCH 2/5] No need to re-check cache Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/schema.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/go/vt/schemadiff/schema.go b/go/vt/schemadiff/schema.go index ef88a7962eb..0ee0bfed2e0 100644 --- a/go/vt/schemadiff/schema.go +++ b/go/vt/schemadiff/schema.go @@ -166,9 +166,7 @@ func (s *Schema) findForeignKeyLoop(tableName string, seen []string) (loop []str // It's possible for one table to participate in more than one foreign key loop, but // we suffice with one loop, since we already only ever report one foreign key error // per table. - if _, ok := s.foreignKeyLoopMap[tableName]; !ok { - s.foreignKeyLoopMap[tableName] = loop - } + s.foreignKeyLoopMap[tableName] = loop return loop } } From bc019eb9f758726006854d0db1528864f97214f0 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 28 Jan 2024 11:13:04 +0200 Subject: [PATCH 3/5] ForeignKeyDependencyUnresolvedError does not need to indicate 'loop' possibility as this is already covered by ForeignKeyLoopError Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/errors.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/schemadiff/errors.go b/go/vt/schemadiff/errors.go index 55ff7cb6d7c..a2f459684ef 100644 --- a/go/vt/schemadiff/errors.go +++ b/go/vt/schemadiff/errors.go @@ -282,7 +282,7 @@ type ForeignKeyDependencyUnresolvedError struct { } func (e *ForeignKeyDependencyUnresolvedError) Error() string { - return fmt.Sprintf("table %s has unresolved/loop foreign key dependencies", + return fmt.Sprintf("table %s has unresolved foreign key dependencies", sqlescape.EscapeID(e.Table)) } From 2bd3f626c1a81b127204b499d96d33ee97318be5 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 28 Jan 2024 11:15:01 +0200 Subject: [PATCH 4/5] more complex test scenarios Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/schema_test.go | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/go/vt/schemadiff/schema_test.go b/go/vt/schemadiff/schema_test.go index 8baefa4c4e4..0913b9a1165 100644 --- a/go/vt/schemadiff/schema_test.go +++ b/go/vt/schemadiff/schema_test.go @@ -495,10 +495,34 @@ func TestInvalidTableForeignKeyReference(t *testing.T) { } _, err := NewSchemaFromQueries(NewTestEnv(), fkQueries) assert.Error(t, err) - assert.ErrorContains(t, err, (&ForeignKeyDependencyUnresolvedError{Table: "t11"}).Error()) + assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t13", "t11"}}).Error()) + assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t13", "t11"}}).Error()) + assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t13", "t11"}}).Error()) + } + { + fkQueries := []string{ + "create table t13 (id int primary key, i int, constraint f11 foreign key (i) references t11(id) on delete restrict)", + "create table t11 (id int primary key, i int, constraint f0 foreign key (i) references t0(id) on delete restrict)", + "create table t12 (id int primary key, i int, constraint f13 foreign key (i) references t13(id) on delete restrict)", + } + _, err := NewSchemaFromQueries(NewTestEnv(), fkQueries) + assert.Error(t, err) + assert.ErrorContains(t, err, (&ForeignKeyNonexistentReferencedTableError{Table: "t11", ReferencedTable: "t0"}).Error()) assert.ErrorContains(t, err, (&ForeignKeyDependencyUnresolvedError{Table: "t12"}).Error()) assert.ErrorContains(t, err, (&ForeignKeyDependencyUnresolvedError{Table: "t13"}).Error()) } + { + fkQueries := []string{ + "create table t13 (id int primary key, i int, constraint f11 foreign key (i) references t11(id) on delete restrict, constraint f12 foreign key (i) references t12(id) on delete restrict)", + "create table t11 (id int primary key, i int, constraint f0 foreign key (i) references t0(id) on delete restrict)", + "create table t12 (id int primary key, i int, constraint f13 foreign key (i) references t13(id) on delete restrict)", + } + _, err := NewSchemaFromQueries(NewTestEnv(), fkQueries) + assert.Error(t, err) + assert.ErrorContains(t, err, (&ForeignKeyNonexistentReferencedTableError{Table: "t11", ReferencedTable: "t0"}).Error()) + assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t12", Loop: []string{"t12", "t13", "t12"}}).Error()) + assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t13", Loop: []string{"t12", "t13", "t12"}}).Error()) + } } func TestGetEntityColumnNames(t *testing.T) { From 1a4ed10bd31d06cfd65f6b439876d51930b09cfd Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 5 Feb 2024 07:04:16 +0200 Subject: [PATCH 5/5] wording changes per review Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/errors.go | 10 +++++----- go/vt/schemadiff/schema.go | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go/vt/schemadiff/errors.go b/go/vt/schemadiff/errors.go index a2f459684ef..efe2bbdef78 100644 --- a/go/vt/schemadiff/errors.go +++ b/go/vt/schemadiff/errors.go @@ -294,18 +294,18 @@ type ForeignKeyLoopError struct { func (e *ForeignKeyLoopError) Error() string { tableIsInsideLoop := false - escaped := make([]string, 0, len(e.Loop)) - for _, t := range e.Loop { - escaped = append(escaped, sqlescape.EscapeID(t)) + escaped := make([]string, len(e.Loop)) + for i, t := range e.Loop { + escaped[i] = sqlescape.EscapeID(t) if t == e.Table { tableIsInsideLoop = true } } if tableIsInsideLoop { - return fmt.Sprintf("table %s participates in foreign key loop: %s", + return fmt.Sprintf("table %s participates in a circular foreign key reference: %s", sqlescape.EscapeID(e.Table), strings.Join(escaped, ", ")) } - return fmt.Sprintf("table %s references foreign key loop: %s", + return fmt.Sprintf("table %s references a circular foreign key reference: %s", sqlescape.EscapeID(e.Table), strings.Join(escaped, ", ")) } diff --git a/go/vt/schemadiff/schema.go b/go/vt/schemadiff/schema.go index 0ee0bfed2e0..f07697e1ea7 100644 --- a/go/vt/schemadiff/schema.go +++ b/go/vt/schemadiff/schema.go @@ -156,7 +156,7 @@ func (s *Schema) findForeignKeyLoop(tableName string, seen []string) (loop []str } if seenTable == tableName { // This table alreay appears in `seen`. - // We only return the loop portion of `seen` that starts and ends with this table. + // We only return the suffix of `seen` that starts (and now ends) with this table. return seen[i:] } }