-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
schemadiff
: analyze and report foreign key loops/cycles
#15062
Changes from 4 commits
7c76d8c
66ee6ab
bc019eb
2bd3f62
1a4ed10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -282,10 +282,33 @@ type ForeignKeyDependencyUnresolvedError struct { | |
} | ||
|
||
func (e *ForeignKeyDependencyUnresolvedError) Error() string { | ||
return fmt.Sprintf("table %s has unresolved/loop foreign key dependencies", | ||
return fmt.Sprintf("table %s has unresolved foreign key dependencies", | ||
sqlescape.EscapeID(e.Table)) | ||
} | ||
|
||
type ForeignKeyLoopError struct { | ||
Table string | ||
Loop []string | ||
} | ||
|
||
func (e *ForeignKeyLoopError) Error() string { | ||
tableIsInsideLoop := false | ||
|
||
escaped := make([]string, 0, len(e.Loop)) | ||
for _, t := range e.Loop { | ||
escaped = append(escaped, sqlescape.EscapeID(t)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitty, but I think this is slightly more performant:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Applied. |
||
if t == e.Table { | ||
tableIsInsideLoop = true | ||
} | ||
} | ||
if tableIsInsideLoop { | ||
return fmt.Sprintf("table %s participates in foreign key loop: %s", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think "infinite loop" or "circular foreign key references" might be more descriptive for user facing error messages and docs, etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
sqlescape.EscapeID(e.Table), strings.Join(escaped, ", ")) | ||
} | ||
return fmt.Sprintf("table %s references foreign key loop: %s", | ||
sqlescape.EscapeID(e.Table), strings.Join(escaped, ", ")) | ||
} | ||
|
||
type ForeignKeyNonexistentReferencedTableError struct { | ||
Table string | ||
ReferencedTable string | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ type Schema struct { | |
|
||
foreignKeyParents []*CreateTableEntity // subset of tables | ||
foreignKeyChildren []*CreateTableEntity // subset of tables | ||
foreignKeyLoopMap map[string][]string // map of table name that either participate, or directly or indirectly reference foreign key loops | ||
|
||
env *Environment | ||
} | ||
|
@@ -53,6 +54,7 @@ func newEmptySchema(env *Environment) *Schema { | |
|
||
foreignKeyParents: []*CreateTableEntity{}, | ||
foreignKeyChildren: []*CreateTableEntity{}, | ||
foreignKeyLoopMap: map[string][]string{}, | ||
|
||
env: env, | ||
} | ||
|
@@ -135,6 +137,42 @@ func getForeignKeyParentTableNames(createTable *sqlparser.CreateTable) (names [] | |
return names | ||
} | ||
|
||
// findForeignKeyLoop is a stateful recursive function that determines whether a given table participates in a foreign | ||
// key loop or derives from one. It returns a list of table names that form a loop, or nil if no loop is found. | ||
// The function updates and checks the stateful map s.foreignKeyLoopMap to avoid re-analyzing the same table twice. | ||
func (s *Schema) findForeignKeyLoop(tableName string, seen []string) (loop []string) { | ||
if loop := s.foreignKeyLoopMap[tableName]; loop != nil { | ||
return loop | ||
} | ||
t := s.Table(tableName) | ||
if t == nil { | ||
return nil | ||
} | ||
seen = append(seen, tableName) | ||
for i, seenTable := range seen { | ||
if i == len(seen)-1 { | ||
// as we've just appended the table name to the end of the slice, we should skip it. | ||
break | ||
} | ||
if seenTable == tableName { | ||
// This table alreay appears in `seen`. | ||
// We only return the loop portion of `seen` that starts and ends with this table. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this comment is a little off, no? Maybe ~ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reworded as:
|
||
return seen[i:] | ||
} | ||
} | ||
for _, referencedTableName := range getForeignKeyParentTableNames(t.CreateTable) { | ||
if loop := s.findForeignKeyLoop(referencedTableName, seen); loop != nil { | ||
// Found loop. Update cache. | ||
// It's possible for one table to participate in more than one foreign key loop, but | ||
// we suffice with one loop, since we already only ever report one foreign key error | ||
// per table. | ||
s.foreignKeyLoopMap[tableName] = loop | ||
return loop | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// getViewDependentTableNames analyzes a CREATE VIEW definition and extracts all tables/views read by this view | ||
func getViewDependentTableNames(createView *sqlparser.CreateView) (names []string) { | ||
_ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) { | ||
|
@@ -309,7 +347,16 @@ func (s *Schema) normalize() error { | |
} | ||
iterationLevel++ | ||
} | ||
|
||
if len(s.sorted) != len(s.tables)+len(s.views) { | ||
|
||
for _, t := range s.tables { | ||
if _, ok := dependencyLevels[t.Name()]; !ok { | ||
if loop := s.findForeignKeyLoop(t.Name(), nil); loop != nil { | ||
errs = errors.Join(errs, addEntityFkError(t, &ForeignKeyLoopError{Table: t.Name(), Loop: loop})) | ||
} | ||
} | ||
} | ||
// We have leftover tables or views. This can happen if the schema definition is invalid: | ||
// - a table's foreign key references a nonexistent table | ||
// - two or more tables have circular FK dependency | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it useful to have for each step in the loop also the column name? To more exactly identify the specific loop?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm afraid of clutter. The foreign key might reference multiple columns. It is indeed possible that same table
child
will reference tableparent
with two different foreign keys, one of which has a cycle, the other does not. In such case it would indeed be beneficial to identify the referenced columns both on parent and child; but again this is an amount of information that is likely to create much background noise. Alternatively, we could use the foreign key name participating in the loop.Either solution would complicate the loop detection logic only slightly; question is how useful vs. confusing would the information be.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just took a look at the code. The current approach (only report table name) is consistent with other
schemadiff
error reports, that suffice with table name or column name without drilling into specific details. I think this should be fine as it is for now.