Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BQ: avoid generating SQL while holding metadata lock #879

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions flow/connectors/bigquery/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -782,23 +782,11 @@ func (c *BigQueryConnector) NormalizeRecords(req *model.NormalizeRecordsRequest)
return nil, fmt.Errorf("couldn't get tablename to unchanged cols mapping: %w", err)
}

stmts := []string{}
// append all the statements to one list
c.logger.Info(fmt.Sprintf("merge raw records to corresponding tables: %s %s %v",
c.datasetID, rawTableName, distinctTableNames))

release, err := c.grabJobsUpdateLock()
if err != nil {
return nil, fmt.Errorf("failed to grab lock: %v", err)
}

defer func() {
err := release()
if err != nil {
c.logger.Error("failed to release lock", slog.Any("error", err))
}
}()

stmts := make([]string, 0, len(distinctTableNames)*3+3)
stmts = append(stmts, "BEGIN TRANSACTION;")

for _, tableName := range distinctTableNames {
Expand All @@ -817,21 +805,34 @@ func (c *BigQueryConnector) NormalizeRecords(req *model.NormalizeRecordsRequest)
},
}
// normalize anything between last normalized batch id to last sync batchid
mergeStmts := mergeGen.generateMergeStmts()
stmts = append(stmts, mergeStmts...)
createTemp, mergeStmt, dropTemp := mergeGen.generateMergeStmts()
stmts = append(stmts, createTemp, mergeStmt, dropTemp)
}
// update metadata to make the last normalized batch id to the recent last sync batch id.
updateMetadataStmt := fmt.Sprintf(
"UPDATE %s.%s SET normalize_batch_id=%d WHERE mirror_job_name = '%s';",
c.datasetID, MirrorJobsTable, syncBatchID, req.FlowJobName)
stmts = append(stmts, updateMetadataStmt)
stmts = append(stmts, "COMMIT TRANSACTION;")
mergeQuery := strings.Join(stmts, "\n")

release, err := c.grabJobsUpdateLock()
if err != nil {
return nil, fmt.Errorf("failed to grab lock: %v", err)
}

defer func() {
err := release()
if err != nil {
c.logger.Error("failed to release lock", slog.Any("error", err))
}
}()

// put this within a transaction
// TODO - not truncating rows in staging table as of now.
// err = c.truncateTable(staging...)

_, err = c.client.Query(strings.Join(stmts, "\n")).Read(c.ctx)
_, err = c.client.Query(mergeQuery).Read(c.ctx)
if err != nil {
return nil, fmt.Errorf("failed to execute statements %s in a transaction: %v", strings.Join(stmts, "\n"), err)
}
Expand Down
6 changes: 3 additions & 3 deletions flow/connectors/bigquery/merge_statement_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ type mergeStmtGenerator struct {
peerdbCols *protos.PeerDBColumns
}

// GenerateMergeStmt generates a merge statements.
func (m *mergeStmtGenerator) generateMergeStmts() []string {
// GenerateMergeStmt returns 3 strings to create temp, merge, drop temp
func (m *mergeStmtGenerator) generateMergeStmts() (string, string, string) {
// return an empty array for now
flattenedCTE := m.generateFlattenedCTE()
deDupedCTE := m.generateDeDupedCTE()
Expand All @@ -45,7 +45,7 @@ func (m *mergeStmtGenerator) generateMergeStmts() []string {

dropTempTableStmt := fmt.Sprintf("DROP TABLE %s;", tempTable)

return []string{createTempTableStmt, mergeStmt, dropTempTableStmt}
return createTempTableStmt, mergeStmt, dropTempTableStmt
}

// generateFlattenedCTE generates a flattened CTE.
Expand Down
Loading