Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

schemadiff: analyze and report foreign key loops/cycles #15062

Merged
merged 5 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion go/vt/schemadiff/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,33 @@ type ForeignKeyDependencyUnresolvedError struct {
}

func (e *ForeignKeyDependencyUnresolvedError) Error() string {
return fmt.Sprintf("table %s has unresolved/loop foreign key dependencies",
return fmt.Sprintf("table %s has unresolved foreign key dependencies",
sqlescape.EscapeID(e.Table))
}

type ForeignKeyLoopError struct {
Table string
Loop []string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it useful to have for each step in the loop also the column name? To more exactly identify the specific loop?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid of clutter. The foreign key might reference multiple columns. It is indeed possible that same table child will reference table parent with two different foreign keys, one of which has a cycle, the other does not. In such case it would indeed be beneficial to identify the referenced columns both on parent and child; but again this is an amount of information that is likely to create much background noise. Alternatively, we could use the foreign key name participating in the loop.
Either solution would complicate the loop detection logic only slightly; question is how useful vs. confusing would the information be.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just took a look at the code. The current approach (only report table name) is consistent with other schemadiff error reports, that suffice with table name or column name without drilling into specific details. I think this should be fine as it is for now.

}

func (e *ForeignKeyLoopError) Error() string {
tableIsInsideLoop := false

escaped := make([]string, len(e.Loop))
for i, t := range e.Loop {
escaped[i] = sqlescape.EscapeID(t)
if t == e.Table {
tableIsInsideLoop = true
}
}
if tableIsInsideLoop {
return fmt.Sprintf("table %s participates in a circular foreign key reference: %s",
sqlescape.EscapeID(e.Table), strings.Join(escaped, ", "))
}
return fmt.Sprintf("table %s references a circular foreign key reference: %s",
sqlescape.EscapeID(e.Table), strings.Join(escaped, ", "))
}

type ForeignKeyNonexistentReferencedTableError struct {
Table string
ReferencedTable string
Expand Down
47 changes: 47 additions & 0 deletions go/vt/schemadiff/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type Schema struct {

foreignKeyParents []*CreateTableEntity // subset of tables
foreignKeyChildren []*CreateTableEntity // subset of tables
foreignKeyLoopMap map[string][]string // map of table name that either participate, or directly or indirectly reference foreign key loops

env *Environment
}
Expand All @@ -53,6 +54,7 @@ func newEmptySchema(env *Environment) *Schema {

foreignKeyParents: []*CreateTableEntity{},
foreignKeyChildren: []*CreateTableEntity{},
foreignKeyLoopMap: map[string][]string{},

env: env,
}
Expand Down Expand Up @@ -135,6 +137,42 @@ func getForeignKeyParentTableNames(createTable *sqlparser.CreateTable) (names []
return names
}

// findForeignKeyLoop is a stateful recursive function that determines whether a given table participates in a foreign
// key loop or derives from one. It returns a list of table names that form a loop, or nil if no loop is found.
// The function updates and checks the stateful map s.foreignKeyLoopMap to avoid re-analyzing the same table twice.
func (s *Schema) findForeignKeyLoop(tableName string, seen []string) (loop []string) {
if loop := s.foreignKeyLoopMap[tableName]; loop != nil {
return loop
}
t := s.Table(tableName)
if t == nil {
return nil
}
seen = append(seen, tableName)
for i, seenTable := range seen {
if i == len(seen)-1 {
// as we've just appended the table name to the end of the slice, we should skip it.
break
}
if seenTable == tableName {
// This table alreay appears in `seen`.
// We only return the suffix of `seen` that starts (and now ends) with this table.
return seen[i:]
}
}
for _, referencedTableName := range getForeignKeyParentTableNames(t.CreateTable) {
if loop := s.findForeignKeyLoop(referencedTableName, seen); loop != nil {
// Found loop. Update cache.
// It's possible for one table to participate in more than one foreign key loop, but
// we suffice with one loop, since we already only ever report one foreign key error
// per table.
s.foreignKeyLoopMap[tableName] = loop
return loop
}
}
return nil
}

// getViewDependentTableNames analyzes a CREATE VIEW definition and extracts all tables/views read by this view
func getViewDependentTableNames(createView *sqlparser.CreateView) (names []string) {
_ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) {
Expand Down Expand Up @@ -309,7 +347,16 @@ func (s *Schema) normalize() error {
}
iterationLevel++
}

if len(s.sorted) != len(s.tables)+len(s.views) {

for _, t := range s.tables {
if _, ok := dependencyLevels[t.Name()]; !ok {
if loop := s.findForeignKeyLoop(t.Name(), nil); loop != nil {
errs = errors.Join(errs, addEntityFkError(t, &ForeignKeyLoopError{Table: t.Name(), Loop: loop}))
}
}
}
// We have leftover tables or views. This can happen if the schema definition is invalid:
// - a table's foreign key references a nonexistent table
// - two or more tables have circular FK dependency
Expand Down
87 changes: 81 additions & 6 deletions go/vt/schemadiff/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,9 @@ func TestTableForeignKeyOrdering(t *testing.T) {

func TestInvalidSchema(t *testing.T) {
tt := []struct {
schema string
expectErr error
schema string
expectErr error
expectLoopTables int
}{
{
schema: "create table t11 (id int primary key, i int, key ix(i), constraint f11 foreign key (i) references t11(id) on delete restrict)",
Expand Down Expand Up @@ -340,11 +341,60 @@ func TestInvalidSchema(t *testing.T) {
expectErr: &ForeignKeyReferencesViewError{Table: "t11", ReferencedView: "v"},
},
{
// t11 self loop
schema: "create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t11 (id) on delete restrict)",
},
{
// t12<->t11
schema: "create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t12 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict)",
expectErr: errors.Join(
&ForeignKeyDependencyUnresolvedError{Table: "t11"},
&ForeignKeyDependencyUnresolvedError{Table: "t12"},
&ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}},
),
expectLoopTables: 2,
},
{
// t10, t12<->t11
schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t12 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict)",
expectErr: errors.Join(
&ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}},
),
expectLoopTables: 2,
},
{
// t10, t12<->t11<-t13
schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, constraint f11 foreign key (i) references t12 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict); create table t13 (id int primary key, i int, constraint f13 foreign key (i) references t11 (id) on delete restrict)",
expectErr: errors.Join(
&ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t11"}},
),
expectLoopTables: 3,
},
{
// t10
// ^
// |
//t12<->t11<-t13
schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, i10 int, constraint f11 foreign key (i) references t12 (id) on delete restrict, constraint f1110 foreign key (i10) references t10 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict); create table t13 (id int primary key, i int, constraint f13 foreign key (i) references t11 (id) on delete restrict)",
expectErr: errors.Join(
&ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t11"}},
),
expectLoopTables: 3,
},
{
// t10, t12<->t11<-t13<-t14
schema: "create table t10(id int primary key); create table t11 (id int primary key, i int, i10 int, constraint f11 foreign key (i) references t12 (id) on delete restrict, constraint f1110 foreign key (i10) references t10 (id) on delete restrict); create table t12 (id int primary key, i int, constraint f12 foreign key (i) references t11 (id) on delete restrict); create table t13 (id int primary key, i int, constraint f13 foreign key (i) references t11 (id) on delete restrict); create table t14 (id int primary key, i int, constraint f14 foreign key (i) references t13 (id) on delete restrict)",
expectErr: errors.Join(
&ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t11"}},
&ForeignKeyLoopError{Table: "t14", Loop: []string{"t11", "t12", "t11"}},
),
expectLoopTables: 4,
},
{
schema: "create table t11 (id int primary key, i int, key ix(i), constraint f11 foreign key (i) references t11(id2) on delete restrict)",
Expand Down Expand Up @@ -406,13 +456,14 @@ func TestInvalidSchema(t *testing.T) {
for _, ts := range tt {
t.Run(ts.schema, func(t *testing.T) {

_, err := NewSchemaFromSQL(NewTestEnv(), ts.schema)
s, err := NewSchemaFromSQL(NewTestEnv(), ts.schema)
if ts.expectErr == nil {
assert.NoError(t, err)
} else {
assert.Error(t, err)
assert.EqualError(t, err, ts.expectErr.Error())
}
assert.Equal(t, ts.expectLoopTables, len(s.foreignKeyLoopMap))
})
}
}
Expand Down Expand Up @@ -444,10 +495,34 @@ func TestInvalidTableForeignKeyReference(t *testing.T) {
}
_, err := NewSchemaFromQueries(NewTestEnv(), fkQueries)
assert.Error(t, err)
assert.ErrorContains(t, err, (&ForeignKeyDependencyUnresolvedError{Table: "t11"}).Error())
assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t11", Loop: []string{"t11", "t12", "t13", "t11"}}).Error())
assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t12", Loop: []string{"t11", "t12", "t13", "t11"}}).Error())
assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t13", Loop: []string{"t11", "t12", "t13", "t11"}}).Error())
}
{
fkQueries := []string{
"create table t13 (id int primary key, i int, constraint f11 foreign key (i) references t11(id) on delete restrict)",
"create table t11 (id int primary key, i int, constraint f0 foreign key (i) references t0(id) on delete restrict)",
"create table t12 (id int primary key, i int, constraint f13 foreign key (i) references t13(id) on delete restrict)",
}
_, err := NewSchemaFromQueries(NewTestEnv(), fkQueries)
assert.Error(t, err)
assert.ErrorContains(t, err, (&ForeignKeyNonexistentReferencedTableError{Table: "t11", ReferencedTable: "t0"}).Error())
assert.ErrorContains(t, err, (&ForeignKeyDependencyUnresolvedError{Table: "t12"}).Error())
assert.ErrorContains(t, err, (&ForeignKeyDependencyUnresolvedError{Table: "t13"}).Error())
}
{
fkQueries := []string{
"create table t13 (id int primary key, i int, constraint f11 foreign key (i) references t11(id) on delete restrict, constraint f12 foreign key (i) references t12(id) on delete restrict)",
"create table t11 (id int primary key, i int, constraint f0 foreign key (i) references t0(id) on delete restrict)",
"create table t12 (id int primary key, i int, constraint f13 foreign key (i) references t13(id) on delete restrict)",
}
_, err := NewSchemaFromQueries(NewTestEnv(), fkQueries)
assert.Error(t, err)
assert.ErrorContains(t, err, (&ForeignKeyNonexistentReferencedTableError{Table: "t11", ReferencedTable: "t0"}).Error())
assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t12", Loop: []string{"t12", "t13", "t12"}}).Error())
assert.ErrorContains(t, err, (&ForeignKeyLoopError{Table: "t13", Loop: []string{"t12", "t13", "t12"}}).Error())
}
}

func TestGetEntityColumnNames(t *testing.T) {
Expand Down
Loading