Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flowable: Use errgroup in replicateQRep #1390

Merged
merged 5 commits into from
Feb 28, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 41 additions & 32 deletions flow/activities/flowable.go
Original file line number Diff line number Diff line change
Expand Up @@ -613,9 +613,7 @@ func (a *FlowableActivity) replicateQRepPartition(ctx context.Context,
return fmt.Errorf("failed to update start time for partition: %w", err)
}

pullCtx, pullCancel := context.WithCancel(ctx)
defer pullCancel()
srcConn, err := connectors.GetQRepPullConnector(pullCtx, config.SourcePeer)
srcConn, err := connectors.GetQRepPullConnector(ctx, config.SourcePeer)
if err != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
return fmt.Errorf("failed to get qrep source connector: %w", err)
Expand All @@ -637,31 +635,41 @@ func (a *FlowableActivity) replicateQRepPartition(ctx context.Context,

var stream *model.QRecordStream
bufferSize := shared.FetchAndChannelSize
var wg sync.WaitGroup

var goroutineErr error = nil
errGroup, errCtx := errgroup.WithContext(ctx)
Amogh-Bharadwaj marked this conversation as resolved.
Show resolved Hide resolved
var rowsSynced int
if config.SourcePeer.Type == protos.DBType_POSTGRES {
stream = model.NewQRecordStream(bufferSize)
Amogh-Bharadwaj marked this conversation as resolved.
Show resolved Hide resolved
wg.Add(1)

go func() {
errGroup.Go(func() error {
pgConn := srcConn.(*connpostgres.PostgresConnector)
tmp, err := pgConn.PullQRepRecordStream(ctx, config, partition, stream)
tmp, err := pgConn.PullQRepRecordStream(errCtx, config, partition, stream)
numRecords := int64(tmp)
if err != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
logger.Error("failed to pull records", slog.Any("error", err))
goroutineErr = err
return fmt.Errorf("failed to pull records: %v", err)
Amogh-Bharadwaj marked this conversation as resolved.
Show resolved Hide resolved
} else {
err = monitoring.UpdatePullEndTimeAndRowsForPartition(ctx,
err = monitoring.UpdatePullEndTimeAndRowsForPartition(errCtx,
a.CatalogPool, runUUID, partition, numRecords)
if err != nil {
logger.Error(err.Error())
goroutineErr = err
}
}
wg.Done()
}()
return nil
})

errGroup.Go(func() error {
rowsSynced, err = dstConn.SyncQRepRecords(ctx, config, partition, stream)
if err != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
return fmt.Errorf("failed to sync records: %w", err)
}
return nil
})

err = errGroup.Wait()
if err != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
return err
}
} else {
recordBatch, err := srcConn.PullQRepRecords(ctx, config, partition)
if err != nil {
Expand All @@ -680,30 +688,31 @@ func (a *FlowableActivity) replicateQRepPartition(ctx context.Context,
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
return fmt.Errorf("failed to convert to qrecord stream: %w", err)
}
}

rowsSynced, err := dstConn.SyncQRepRecords(ctx, config, partition, stream)
if err != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
return fmt.Errorf("failed to sync records: %w", err)
}
rowsSynced, err = dstConn.SyncQRepRecords(ctx, config, partition, stream)
if err != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
return fmt.Errorf("failed to sync records: %w", err)
}

if rowsSynced == 0 {
logger.Info("no records to push for partition " + partition.PartitionId)
pullCancel()
} else {
wg.Wait()
if goroutineErr != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, goroutineErr)
return goroutineErr
if rowsSynced == 0 {
logger.Info("no records to push for partition " + partition.PartitionId)
} else {
err = errGroup.Wait()
serprex marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
a.Alerter.LogFlowError(ctx, config.FlowJobName, err)
return err
}
}
}

logger.Info(fmt.Sprintf("pushed %d records", rowsSynced))

if rowsSynced > 0 {
err := monitoring.UpdateRowsSyncedForPartition(ctx, a.CatalogPool, rowsSynced, runUUID, partition)
if err != nil {
return err
}

logger.Info(fmt.Sprintf("pushed %d records", rowsSynced))
}

err = monitoring.UpdateEndTimeForPartition(ctx, a.CatalogPool, runUUID, partition)
Expand Down
Loading