From de3c2c405bba16d7dc6bf44727b9882a6d081623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 00:10:47 +0000 Subject: [PATCH 01/67] WaitFor --- flow/e2e/postgres/peer_flow_pg_test.go | 148 ++++++++++++------------- flow/e2e/test_utils.go | 16 +++ 2 files changed, 90 insertions(+), 74 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 5ebf1e457b..0d547b793c 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -3,7 +3,9 @@ package e2e_postgres import ( "context" "fmt" + "reflect" "sync" + "time" "github.com/PeerDB-io/peer-flow/e2e" "github.com/PeerDB-io/peer-flow/generated/protos" @@ -104,6 +106,28 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Flow_PG() { env.AssertExpectations(s.t) } +func WaitFuncSchema( + s PeerFlowE2ETestSuitePG, + srcTableName string, + dstTableName string, + cols string, + expectedTableSchema *protos.TableSchema, +) func(context.Context) bool { + return func(ctx context.Context) bool { + output, err := s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ + TableIdentifiers: []string{dstTableName}, + }) + if err != nil { + return false + } + tableSchema := output.TableNameSchemaMapping[dstTableName] + if !reflect.DeepEqual(expectedTableSchema, tableSchema) { + return false + } + return s.comparePGTables(srcTableName, dstTableName, cols) == nil + } +} + func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) @@ -136,7 +160,10 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. + wg := sync.WaitGroup{} + wg.Add(1) go func() { + defer wg.Done() // insert first row. e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -144,21 +171,14 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted initial row in the source table") - // verify we got our first row. - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) - expectedTableSchema := &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1"}, - ColumnTypes: []string{string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64)}, - PrimaryKeyColumns: []string{"id"}, - } - output, err := s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - require.Equal(s.t, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) - err = s.comparePGTables(srcTableName, dstTableName, "id,c1") - e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing first row", + WaitFuncSchema(s, srcTableName, dstTableName, "id,c1", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1"}, + ColumnTypes: []string{string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64)}, + PrimaryKeyColumns: []string{"id"}, + }), + ) // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -170,25 +190,18 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row with added c2 in the source table") - // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 4) - expectedTableSchema = &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2"}, - ColumnTypes: []string{ - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - }, - PrimaryKeyColumns: []string{"id"}, - } - output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - require.Equal(s.t, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) - err = s.comparePGTables(srcTableName, dstTableName, "id,c1,c2") - e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing altered row", + WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c2", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "c2"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + }, + PrimaryKeyColumns: []string{"id"}, + }), + ) // alter source table, add column c3, drop column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -200,26 +213,19 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row with added c3 in the source table") - // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 6) - expectedTableSchema = &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2", "c3"}, - ColumnTypes: []string{ - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - }, - PrimaryKeyColumns: []string{"id"}, - } - output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - require.Equal(s.t, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) - err = s.comparePGTables(srcTableName, dstTableName, "id,c1,c3") - e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing dropped column row", + WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c3", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "c2", "c3"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + }, + PrimaryKeyColumns: []string{"id"}, + }), + ) // alter source table, drop column c3 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -231,29 +237,23 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row after dropping all columns in the source table") - // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 8) - expectedTableSchema = &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2", "c3"}, - ColumnTypes: []string{ - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - }, - PrimaryKeyColumns: []string{"id"}, - } - output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - require.Equal(s.t, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) - err = s.comparePGTables(srcTableName, dstTableName, "id,c1") - e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing 2nd dropped column row", + WaitFuncSchema(s, srcTableName, dstTableName, "id,c1", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "c2", "c3"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + }, + PrimaryKeyColumns: []string{"id"}, + }), + ) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + wg.Wait() // Verify workflow completes without error require.True(s.t, env.IsWorkflowCompleted()) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 835e367f69..db94e453a1 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -517,3 +517,19 @@ func EnvEqualRecordBatches(t *testing.T, env *testsuite.TestWorkflowEnvironment, runtime.Goexit() } } + +func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, name string, f func(ctx context.Context) bool) { + t.Helper() + + ctx, cleanup := context.WithTimeout(context.Background(), timeout) + defer cleanup() + deadline, _ := ctx.Deadline() + for !f(ctx) { + if time.Now().Compare(deadline) >= 0 { + t.Error("WaitFor timed out", name) + env.CancelWorkflow() + runtime.Goexit() + } + time.Sleep(time.Second) + } +} From 03792192e45348147dabe29877df38a8c93f4563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 01:50:23 +0000 Subject: [PATCH 02/67] debug waitfor --- flow/e2e/postgres/peer_flow_pg_test.go | 1 + flow/e2e/test_utils.go | 1 + 2 files changed, 2 insertions(+) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 0d547b793c..f3e1890941 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -122,6 +122,7 @@ func WaitFuncSchema( } tableSchema := output.TableNameSchemaMapping[dstTableName] if !reflect.DeepEqual(expectedTableSchema, tableSchema) { + s.t.Log("deep equal false", expectedTableSchema, tableSchema) return false } return s.comparePGTables(srcTableName, dstTableName, cols) == nil diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index db94e453a1..90ff2993f9 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -525,6 +525,7 @@ func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout ti defer cleanup() deadline, _ := ctx.Deadline() for !f(ctx) { + t.Log(time.Now(), deadline) if time.Now().Compare(deadline) >= 0 { t.Error("WaitFor timed out", name) env.CancelWorkflow() From a8e200c3a7e330c922aaf23bcbc57539170f19d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 04:25:55 +0000 Subject: [PATCH 03/67] fix test? --- flow/e2e/postgres/peer_flow_pg_test.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index f3e1890941..c330db07bf 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -174,9 +174,13 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing first row", WaitFuncSchema(s, srcTableName, dstTableName, "id,c1", &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1"}, - ColumnTypes: []string{string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64)}, + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), + }, PrimaryKeyColumns: []string{"id"}, }), ) @@ -194,11 +198,12 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing altered row", WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c2", &protos.TableSchema{ TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2"}, + ColumnNames: []string{"id", "c1", "c2", "_PEERDB_SYNCED_AT"}, ColumnTypes: []string{ string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), }, PrimaryKeyColumns: []string{"id"}, }), @@ -217,10 +222,11 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing dropped column row", WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c3", &protos.TableSchema{ TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2", "c3"}, + ColumnNames: []string{"id", "c1", "c2", "_PEERDB_SYNCED_AT", "c3"}, ColumnTypes: []string{ string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), }, @@ -241,11 +247,12 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing 2nd dropped column row", WaitFuncSchema(s, srcTableName, dstTableName, "id,c1", &protos.TableSchema{ TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2", "c3"}, + ColumnNames: []string{"id", "c1", "c2", "_PEERDB_SYNCED_AT", "c3"}, ColumnTypes: []string{ string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), string(qvalue.QValueKindInt64), }, PrimaryKeyColumns: []string{"id"}, From 6677efb1018302c7863bcaaf136048045f662a80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 04:38:57 +0000 Subject: [PATCH 04/67] experiment: workflow only exits when we cancel it --- flow/e2e/postgres/peer_flow_pg_test.go | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index c330db07bf..fd7580c32c 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -155,8 +155,8 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 1, - MaxBatchSize: 100, + ExitAfterRecords: -1, + MaxBatchSize: 1, } // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup @@ -258,6 +258,8 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { PrimaryKeyColumns: []string{"id"}, }), ) + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -302,13 +304,16 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 10, + ExitAfterRecords: -1, MaxBatchSize: 100, } // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. + wg := sync.WaitGroup{} + wg.Add(1) go func() { + defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -320,19 +325,20 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { } s.t.Log("Inserted 10 rows into the source table") - // verify we got our 10 rows - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) - err = s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") - e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + wg.Wait() // Verify workflow completes without error require.True(s.t, env.IsWorkflowCompleted()) From d84f3f6686c6aa911d91b9ff2ad84c789bd96924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 05:04:56 +0000 Subject: [PATCH 05/67] fix columns --- flow/e2e/postgres/peer_flow_pg_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index fd7580c32c..1943f78977 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -198,12 +198,12 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing altered row", WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c2", &protos.TableSchema{ TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2", "_PEERDB_SYNCED_AT"}, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2"}, ColumnTypes: []string{ - string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), string(qvalue.QValueKindTimestamp), + string(qvalue.QValueKindInt64), }, PrimaryKeyColumns: []string{"id"}, }), @@ -222,7 +222,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing dropped column row", WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c3", &protos.TableSchema{ TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2", "_PEERDB_SYNCED_AT", "c3"}, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2", "c3"}, ColumnTypes: []string{ string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), @@ -247,13 +247,13 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing 2nd dropped column row", WaitFuncSchema(s, srcTableName, dstTableName, "id,c1", &protos.TableSchema{ TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "c2", "_PEERDB_SYNCED_AT", "c3"}, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2", "c3"}, ColumnTypes: []string{ - string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), string(qvalue.QValueKindInt64), string(qvalue.QValueKindTimestamp), string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), }, PrimaryKeyColumns: []string{"id"}, }), From 454fcb782c057291af983dc43bc06f14a87caecd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 05:05:05 +0000 Subject: [PATCH 06/67] handle cancel in cdc_flow --- flow/workflows/cdc_flow.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index e0ede98131..2b6e78aee3 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -331,6 +331,10 @@ func CDCFlowWorkflowWithConfig( // check and act on signals before a fresh flow starts. w.receiveAndHandleSignalAsync(ctx, state) + if err := ctx.Err(); err != nil { + return nil, err + } + if state.ActiveSignal == shared.PauseSignal { startTime := time.Now() state.CurrentFlowState = protos.FlowStatus_STATUS_PAUSED.Enum() @@ -343,6 +347,8 @@ func CDCFlowWorkflowWithConfig( ok, _ := signalChan.ReceiveWithTimeout(ctx, 1*time.Minute, &signalVal) if ok { state.ActiveSignal = shared.FlowSignalHandler(state.ActiveSignal, signalVal, w.logger) + } else if err := ctx.Err(); err != nil { + return nil, err } } From 855e9db7d5c30458e0f06d77b8ce9d1ae0efcb6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 05:36:15 +0000 Subject: [PATCH 07/67] Don't use reflect.DeepEqual --- flow/e2e/postgres/peer_flow_pg_test.go | 34 +++++++++----------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 1943f78977..db23a27175 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -3,7 +3,7 @@ package e2e_postgres import ( "context" "fmt" - "reflect" + "slices" "sync" "time" @@ -111,7 +111,7 @@ func WaitFuncSchema( srcTableName string, dstTableName string, cols string, - expectedTableSchema *protos.TableSchema, + expectedSchema *protos.TableSchema, ) func(context.Context) bool { return func(ctx context.Context) bool { output, err := s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ @@ -121,8 +121,12 @@ func WaitFuncSchema( return false } tableSchema := output.TableNameSchemaMapping[dstTableName] - if !reflect.DeepEqual(expectedTableSchema, tableSchema) { - s.t.Log("deep equal false", expectedTableSchema, tableSchema) + if expectedSchema.TableIdentifier != tableSchema.TableIdentifier || + expectedSchema.IsReplicaIdentityFull != tableSchema.IsReplicaIdentityFull || + slices.Compare(expectedSchema.PrimaryKeyColumns, tableSchema.PrimaryKeyColumns) != 0 || + slices.Compare(expectedSchema.ColumnNames, tableSchema.ColumnNames) != 0 || + slices.Compare(expectedSchema.ColumnTypes, tableSchema.ColumnTypes) != 0 { + s.t.Log("schemas unequal", expectedSchema, tableSchema) return false } return s.comparePGTables(srcTableName, dstTableName, cols) == nil @@ -264,14 +268,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) wg.Wait() - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - env.AssertExpectations(s.t) } @@ -334,22 +330,14 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize modifications", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) wg.Wait() - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - - err = s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") - require.NoError(s.t, err) - env.AssertExpectations(s.t) } From c510262f63ca3be912effdfac6312c2fb7d9b18d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 06:17:25 +0000 Subject: [PATCH 08/67] idea: need to sleep to avoid test ending before slot closed --- flow/connectors/postgres/postgres.go | 4 +--- flow/e2e/postgres/peer_flow_pg_test.go | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flow/connectors/postgres/postgres.go b/flow/connectors/postgres/postgres.go index 43a11a2e72..7fef9d26c2 100644 --- a/flow/connectors/postgres/postgres.go +++ b/flow/connectors/postgres/postgres.go @@ -196,9 +196,7 @@ func (c *PostgresConnector) SetLastOffset(jobName string, lastOffset int64) erro // PullRecords pulls records from the source. func (c *PostgresConnector) PullRecords(catalogPool *pgxpool.Pool, req *model.PullRecordsRequest) error { - defer func() { - req.RecordStream.Close() - }() + defer req.RecordStream.Close() // Slotname would be the job name prefixed with "peerflow_slot_" slotName := fmt.Sprintf("peerflow_slot_%s", req.FlowJobName) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index db23a27175..182d628bf7 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -267,6 +267,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + time.Sleep(10 * time.Second) wg.Wait() env.AssertExpectations(s.t) } @@ -337,6 +338,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + time.Sleep(10 * time.Second) wg.Wait() env.AssertExpectations(s.t) } From fa6891cfbaf8f2b0fb2d911c0bf46e3189303146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 06:30:18 +0000 Subject: [PATCH 09/67] think activity context needs heartbeat to find out workflow was cancelled --- flow/connectors/postgres/cdc.go | 1 + 1 file changed, 1 insertion(+) diff --git a/flow/connectors/postgres/cdc.go b/flow/connectors/postgres/cdc.go index fcb3e64174..fcfc6ccf45 100644 --- a/flow/connectors/postgres/cdc.go +++ b/flow/connectors/postgres/cdc.go @@ -338,6 +338,7 @@ func (p *PostgresCDCSource) consumeStream( var ctx context.Context var cancel context.CancelFunc + activity.RecordHeartbeat(p.ctx, "consuming stream") if cdcRecordsStorage.IsEmpty() { ctx, cancel = context.WithCancel(p.ctx) } else { From 18c3a8dd5f309a12098e7fc38b4a2d3e29dd2278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 06:44:00 +0000 Subject: [PATCH 10/67] theory: need to specify WaitForCancellation:true --- flow/workflows/cdc_flow.go | 14 +++++++++----- flow/workflows/sync_flow.go | 3 +++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index 2b6e78aee3..35a2f2c613 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -216,7 +216,8 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } setupFlowCtx := workflow.WithChildOptions(ctx, childSetupFlowOpts) setupFlowFuture := workflow.ExecuteChildWorkflow(setupFlowCtx, SetupFlowWorkflow, cfg) @@ -242,8 +243,9 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - TaskQueue: taskQueue, - SearchAttributes: mirrorNameSearch, + TaskQueue: taskQueue, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } snapshotFlowCtx := workflow.WithChildOptions(ctx, childSnapshotFlowOpts) snapshotFlowFuture := workflow.ExecuteChildWorkflow(snapshotFlowCtx, SnapshotFlowWorkflow, cfg) @@ -392,7 +394,8 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } syncCtx := workflow.WithChildOptions(ctx, childSyncFlowOpts) syncFlowOptions.RelationMessageMapping = state.RelationMessageMapping @@ -464,7 +467,8 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } normCtx := workflow.WithChildOptions(ctx, childNormalizeFlowOpts) childNormalizeFlowFuture := workflow.ExecuteChildWorkflow( diff --git a/flow/workflows/sync_flow.go b/flow/workflows/sync_flow.go index 09849b1752..0b82ca4c22 100644 --- a/flow/workflows/sync_flow.go +++ b/flow/workflows/sync_flow.go @@ -41,6 +41,7 @@ func (s *SyncFlowExecution) executeSyncFlow( syncMetaCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 1 * time.Minute, + WaitForCancellation: true, }) // execute GetLastSyncedID on destination peer @@ -65,6 +66,7 @@ func (s *SyncFlowExecution) executeSyncFlow( startFlowCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 72 * time.Hour, HeartbeatTimeout: 30 * time.Second, + WaitForCancellation: true, }) // execute StartFlow on the peers to start the flow @@ -83,6 +85,7 @@ func (s *SyncFlowExecution) executeSyncFlow( replayTableSchemaDeltaCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 30 * time.Minute, + WaitForCancellation: true, }) replayTableSchemaInput := &protos.ReplayTableSchemaDeltaInput{ FlowConnectionConfigs: config, From 17a505bf05651c558e01f54338b6794472a20ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 16:39:20 +0000 Subject: [PATCH 11/67] hoping to use existing heartbeat --- flow/connectors/postgres/cdc.go | 1 - 1 file changed, 1 deletion(-) diff --git a/flow/connectors/postgres/cdc.go b/flow/connectors/postgres/cdc.go index fcfc6ccf45..fcb3e64174 100644 --- a/flow/connectors/postgres/cdc.go +++ b/flow/connectors/postgres/cdc.go @@ -338,7 +338,6 @@ func (p *PostgresCDCSource) consumeStream( var ctx context.Context var cancel context.CancelFunc - activity.RecordHeartbeat(p.ctx, "consuming stream") if cdcRecordsStorage.IsEmpty() { ctx, cancel = context.WithCancel(p.ctx) } else { From d01b24e4fb9f65fddfa9f2fc2300d9792e91c818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 17:43:51 +0000 Subject: [PATCH 12/67] utils.HeartbeatRoutine: use context.WithCancel --- flow/activities/flowable.go | 39 ++++++------------- flow/connectors/bigquery/qrep_avro_sync.go | 4 +- flow/connectors/eventhub/eventhub.go | 4 +- flow/connectors/postgres/cdc.go | 5 +-- .../postgres/qrep_query_executor.go | 7 +--- flow/connectors/snowflake/qrep_avro_sync.go | 5 +-- flow/connectors/utils/avro/avro_writer.go | 5 +-- flow/connectors/utils/heartbeat.go | 8 ++-- 8 files changed, 21 insertions(+), 56 deletions(-) diff --git a/flow/activities/flowable.go b/flow/activities/flowable.go index 6ee858c028..92c7f80341 100644 --- a/flow/activities/flowable.go +++ b/flow/activities/flowable.go @@ -222,6 +222,12 @@ func (a *FlowableActivity) StartFlow(ctx context.Context, go a.recordSlotSizePeriodically(errCtx, srcConn, slotNameForMetrics, input.FlowConnectionConfigs.Source.Name) + shutdown := utils.HeartbeatRoutine(ctx, 10*time.Second, func() string { + jobName := input.FlowConnectionConfigs.FlowJobName + return fmt.Sprintf("transferring records for job - %s", jobName) + }) + defer shutdown() + // start a goroutine to pull records from the source recordBatch := model.NewCDCRecordStream() startTime := time.Now() @@ -282,15 +288,6 @@ func (a *FlowableActivity) StartFlow(ctx context.Context, return syncResponse, nil } - shutdown := utils.HeartbeatRoutine(ctx, 10*time.Second, func() string { - jobName := input.FlowConnectionConfigs.FlowJobName - return fmt.Sprintf("pushing records for job - %s", jobName) - }) - - defer func() { - shutdown <- struct{}{} - }() - syncStartTime := time.Now() res, err := dstConn.SyncRecords(&model.SyncRecordsRequest{ Records: recordBatch, @@ -397,9 +394,7 @@ func (a *FlowableActivity) StartNormalize( shutdown := utils.HeartbeatRoutine(ctx, 2*time.Minute, func() string { return fmt.Sprintf("normalizing records from batch for job - %s", input.FlowConnectionConfigs.FlowJobName) }) - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() slog.InfoContext(ctx, "initializing table schema...") err = dstConn.InitializeTableSchema(input.FlowConnectionConfigs.TableNameSchemaMapping) @@ -494,10 +489,7 @@ func (a *FlowableActivity) GetQRepPartitions(ctx context.Context, shutdown := utils.HeartbeatRoutine(ctx, 2*time.Minute, func() string { return fmt.Sprintf("getting partitions for job - %s", config.FlowJobName) }) - - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() partitions, err := srcConn.GetQRepPartitions(config, last) if err != nil { @@ -635,10 +627,7 @@ func (a *FlowableActivity) replicateQRepPartition(ctx context.Context, shutdown := utils.HeartbeatRoutine(ctx, 5*time.Minute, func() string { return fmt.Sprintf("syncing partition - %s: %d of %d total.", partition.PartitionId, idx, total) }) - - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() rowsSynced, err := dstConn.SyncQRepRecords(config, partition, stream) if err != nil { @@ -684,10 +673,7 @@ func (a *FlowableActivity) ConsolidateQRepPartitions(ctx context.Context, config shutdown := utils.HeartbeatRoutine(ctx, 2*time.Minute, func() string { return fmt.Sprintf("consolidating partitions for job - %s", config.FlowJobName) }) - - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() err = dstConn.ConsolidateQRepPartitions(config) if err != nil { @@ -996,10 +982,7 @@ func (a *FlowableActivity) ReplicateXminPartition(ctx context.Context, shutdown := utils.HeartbeatRoutine(ctx, 5*time.Minute, func() string { return "syncing xmin." }) - - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() rowsSynced, err := dstConn.SyncQRepRecords(config, partition, stream) if err != nil { diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index ceb3b38402..d6df8fdb6e 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -373,9 +373,7 @@ func (s *QRepAvroSyncMethod) writeToStage( objectFolder, stagingTable) }, ) - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() var avroFile *avro.AvroFile ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, diff --git a/flow/connectors/eventhub/eventhub.go b/flow/connectors/eventhub/eventhub.go index 1bb7b00166..24a37eaf91 100644 --- a/flow/connectors/eventhub/eventhub.go +++ b/flow/connectors/eventhub/eventhub.go @@ -216,9 +216,7 @@ func (c *EventHubConnector) SyncRecords(req *model.SyncRecordsRequest) (*model.S numRecords, req.FlowJobName, ) }) - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() numRecords, err = c.processBatch(req.FlowJobName, batch, maxParallelism) if err != nil { diff --git a/flow/connectors/postgres/cdc.go b/flow/connectors/postgres/cdc.go index fcb3e64174..af2b483de1 100644 --- a/flow/connectors/postgres/cdc.go +++ b/flow/connectors/postgres/cdc.go @@ -249,10 +249,7 @@ func (p *PostgresCDCSource) consumeStream( currRecords := cdcRecordsStorage.Len() return fmt.Sprintf("pulling records for job - %s, currently have %d records", jobName, currRecords) }) - - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() standbyMessageTimeout := req.IdleTimeout nextStandbyMessageDeadline := time.Now().Add(standbyMessageTimeout) diff --git a/flow/connectors/postgres/qrep_query_executor.go b/flow/connectors/postgres/qrep_query_executor.go index bb07fbb98f..839845d284 100644 --- a/flow/connectors/postgres/qrep_query_executor.go +++ b/flow/connectors/postgres/qrep_query_executor.go @@ -83,13 +83,10 @@ func (qe *QRepQueryExecutor) executeQueryInTx(tx pgx.Tx, cursorName string, fetc q := fmt.Sprintf("FETCH %d FROM %s", fetchSize, cursorName) if !qe.testEnv { - shutdownCh := utils.HeartbeatRoutine(qe.ctx, 1*time.Minute, func() string { + shutdown := utils.HeartbeatRoutine(qe.ctx, 1*time.Minute, func() string { return fmt.Sprintf("running '%s'", q) }) - - defer func() { - shutdownCh <- struct{}{} - }() + defer shutdown() } rows, err := tx.Query(qe.ctx, q) diff --git a/flow/connectors/snowflake/qrep_avro_sync.go b/flow/connectors/snowflake/qrep_avro_sync.go index 07eb791c5c..83521088d8 100644 --- a/flow/connectors/snowflake/qrep_avro_sync.go +++ b/flow/connectors/snowflake/qrep_avro_sync.go @@ -282,10 +282,7 @@ func (s *SnowflakeAvroSyncMethod) putFileToStage(avroFile *avro.AvroFile, stage shutdown := utils.HeartbeatRoutine(s.connector.ctx, 10*time.Second, func() string { return fmt.Sprintf("putting file to stage %s", stage) }) - - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() if _, err := s.connector.database.ExecContext(s.connector.ctx, putCmd); err != nil { return fmt.Errorf("failed to put file to stage: %w", err) diff --git a/flow/connectors/utils/avro/avro_writer.go b/flow/connectors/utils/avro/avro_writer.go index 90c016b404..1e6f318713 100644 --- a/flow/connectors/utils/avro/avro_writer.go +++ b/flow/connectors/utils/avro/avro_writer.go @@ -136,10 +136,7 @@ func (p *peerDBOCFWriter) writeRecordsToOCFWriter(ocfWriter *goavro.OCFWriter) ( written := numRows.Load() return fmt.Sprintf("[avro] written %d rows to OCF", written) }) - - defer func() { - shutdown <- struct{}{} - }() + defer shutdown() } for qRecordOrErr := range p.stream.Records { diff --git a/flow/connectors/utils/heartbeat.go b/flow/connectors/utils/heartbeat.go index c1bc81f077..37f00bc72f 100644 --- a/flow/connectors/utils/heartbeat.go +++ b/flow/connectors/utils/heartbeat.go @@ -13,8 +13,8 @@ func HeartbeatRoutine( ctx context.Context, interval time.Duration, message func() string, -) chan<- struct{} { - shutdown := make(chan struct{}) +) context.CancelFunc { + ctx, cancel := context.WithCancel(ctx) go func() { counter := 0 for { @@ -22,15 +22,13 @@ func HeartbeatRoutine( msg := fmt.Sprintf("heartbeat #%d: %s", counter, message()) RecordHeartbeatWithRecover(ctx, msg) select { - case <-shutdown: - return case <-ctx.Done(): return case <-time.After(interval): } } }() - return shutdown + return cancel } // if the functions are being called outside the context of a Temporal workflow, From 8c52d8b9b5e4d19f6d0194c72ec6358c1c897f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 19:37:50 +0000 Subject: [PATCH 13/67] Try waiting a long time for cancellation --- flow/connectors/postgres/cdc.go | 7 ++++++- flow/e2e/postgres/peer_flow_pg_test.go | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/flow/connectors/postgres/cdc.go b/flow/connectors/postgres/cdc.go index af2b483de1..dd62595c3e 100644 --- a/flow/connectors/postgres/cdc.go +++ b/flow/connectors/postgres/cdc.go @@ -340,9 +340,14 @@ func (p *PostgresCDCSource) consumeStream( } else { ctx, cancel = context.WithDeadline(p.ctx, nextStandbyMessageDeadline) } - rawMsg, err := conn.ReceiveMessage(ctx) cancel() + + ctxErr := p.ctx.Err() + if ctxErr != nil { + return fmt.Errorf("consumeStream preempted: %w", ctxErr) + } + if err != nil && !p.commitLock { if pgconn.Timeout(err) { p.logger.Info(fmt.Sprintf("Stand-by deadline reached, returning currently accumulated records - %d", diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 182d628bf7..0b09aed543 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -267,7 +267,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - time.Sleep(10 * time.Second) + time.Sleep(2 * time.Minute) wg.Wait() env.AssertExpectations(s.t) } From 950e9f417eae01874716fedf5ff9e765ab124b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 4 Jan 2024 22:43:52 +0000 Subject: [PATCH 14/67] Wait for activity to cancel in a waitfor loop in teardown --- flow/connectors/postgres/cdc.go | 23 +++++++++++++---------- flow/e2e/congen.go | 11 ++++++++++- flow/e2e/postgres/peer_flow_pg_test.go | 1 - flow/e2e/test_utils.go | 4 ++-- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/flow/connectors/postgres/cdc.go b/flow/connectors/postgres/cdc.go index dd62595c3e..fc6c665bda 100644 --- a/flow/connectors/postgres/cdc.go +++ b/flow/connectors/postgres/cdc.go @@ -297,17 +297,19 @@ func (p *PostgresCDCSource) consumeStream( } } - if (cdcRecordsStorage.Len() >= int(req.MaxBatchSize)) && !p.commitLock { - return nil - } + if !p.commitLock { + if cdcRecordsStorage.Len() >= int(req.MaxBatchSize) { + return nil + } - if waitingForCommit && !p.commitLock { - p.logger.Info(fmt.Sprintf( - "[%s] commit received, returning currently accumulated records - %d", - p.flowJobName, - cdcRecordsStorage.Len()), - ) - return nil + if waitingForCommit { + p.logger.Info(fmt.Sprintf( + "[%s] commit received, returning currently accumulated records - %d", + p.flowJobName, + cdcRecordsStorage.Len()), + ) + return nil + } } // if we are past the next standby deadline (?) @@ -343,6 +345,7 @@ func (p *PostgresCDCSource) consumeStream( rawMsg, err := conn.ReceiveMessage(ctx) cancel() + utils.RecordHeartbeatWithRecover(p.ctx, "consumeStream ReceiveMessage") ctxErr := p.ctx.Err() if ctxErr != nil { return fmt.Errorf("consumeStream preempted: %w", ctxErr) diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index df1ff17c13..c6300301cd 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "log/slog" + "time" "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" @@ -137,7 +138,15 @@ func SetupPostgres(suffix string) (*pgxpool.Pool, error) { func TearDownPostgres(pool *pgxpool.Pool, suffix string) error { // drop the e2e_test schema if pool != nil { - err := cleanPostgres(pool, suffix) + deadline := time.Now().Add(time.Minute) + var err error + for { + err = cleanPostgres(pool, suffix) + if time.Now().Compare(deadline) > 0 { + break + } + time.Sleep(time.Second) + } if err != nil { return err } diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 0b09aed543..e9c77f544b 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -267,7 +267,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - time.Sleep(2 * time.Minute) wg.Wait() env.AssertExpectations(s.t) } diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 90ff2993f9..bcff6e9d3e 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -14,7 +14,7 @@ import ( "github.com/PeerDB-io/peer-flow/activities" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" connsnowflake "github.com/PeerDB-io/peer-flow/connectors/snowflake" - utils "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" + "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/e2eshared" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/logger" @@ -32,7 +32,7 @@ import ( func RegisterWorkflowsAndActivities(t *testing.T, env *testsuite.TestWorkflowEnvironment) { t.Helper() - conn, err := utils.GetCatalogConnectionPoolFromEnv() + conn, err := pgxpool.New(context.Background(), utils.GetPGConnectionString(GetTestPostgresConf())) if err != nil { t.Fatalf("unable to create catalog connection pool: %v", err) } From 6b635f37816528da9df688ac2516484df988ccbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 02:53:28 +0000 Subject: [PATCH 15/67] Convert rest of e2e/postgres to WaitFor --- flow/e2e/postgres/peer_flow_pg_test.go | 122 ++++++++++++++----------- flow/e2e/postgres/qrep_flow_pg_test.go | 2 +- 2 files changed, 68 insertions(+), 56 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index e9c77f544b..cd0b37b2c2 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -102,8 +102,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Flow_PG() { err = s.comparePGTables(srcTableName, dstTableName, "id,key,value") require.NoError(s.t, err) - - env.AssertExpectations(s.t) } func WaitFuncSchema( @@ -268,7 +266,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) wg.Wait() - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { @@ -337,9 +334,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - time.Sleep(10 * time.Second) wg.Wait() - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { @@ -419,8 +414,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { // verify our updates and delete happened err = s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") require.NoError(s.t, err) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { @@ -457,13 +450,16 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 10, + ExitAfterRecords: -1, MaxBatchSize: 100, } // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. + wg := sync.WaitGroup{} + wg.Add(1) go func() { + defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) // insert 10 rows into the source table @@ -476,28 +472,24 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { } s.t.Log("Inserted 10 rows into the source table") - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil + }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - }() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil + }) - // verify our updates and delete happened - err = s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") - require.NoError(s.t, err) + env.CancelWorkflow() + }() - env.AssertExpectations(s.t) + env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + wg.Wait() } func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { @@ -560,7 +552,6 @@ func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { require.Contains(s.t, err.Error(), "continue as new") checkErr := s.checkPeerdbColumns(dstTableName, 1) require.NoError(s.t, checkErr) - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { @@ -602,7 +593,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 3, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -618,11 +609,15 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) // since we delete stuff, create another table to compare with _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) @@ -630,23 +625,26 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) + + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil + }) + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - wg.Wait() // verify our updates and delete happened err = s.comparePGTables(cmpTableName, dstTableName, "id,c1,c2,t") require.NoError(s.t, err) - softDeleteQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"=TRUE`, - dstTableName) - numRows, err := s.countRowsInQuery(softDeleteQuery) + softDeleteQuery := fmt.Sprintf( + `SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"`, + dstTableName, + ) + numRows, err := s.RunInt64Query(softDeleteQuery) require.NoError(s.t, err) require.Equal(s.t, int64(1), numRows) } @@ -729,9 +727,9 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_IUD_Same_Batch() { require.NoError(s.t, err) softDeleteQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"=TRUE`, + SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"`, dstTableName) - numRows, err := s.countRowsInQuery(softDeleteQuery) + numRows, err := s.RunInt64Query(softDeleteQuery) require.NoError(s.t, err) require.Equal(s.t, int64(1), numRows) } @@ -781,13 +779,18 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. + wg := sync.WaitGroup{} + wg.Add(1) go func() { + defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -806,21 +809,24 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil + }) + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") + wg.Wait() // verify our updates and delete happened - err = s.comparePGTables(cmpTableName, dstTableName, "id,c1,c2,t") require.NoError(s.t, err) softDeleteQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"=TRUE`, + SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"`, dstTableName) - numRows, err := s.countRowsInQuery(softDeleteQuery) + numRows, err := s.RunInt64Query(softDeleteQuery) require.NoError(s.t, err) require.Equal(s.t, int64(1), numRows) } @@ -863,41 +869,47 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 3, + ExitAfterRecords: -1, MaxBatchSize: 100, } // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. + wg := sync.WaitGroup{} + wg.Add(1) go func() { + defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil + }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize reinsert", func(ctx context.Context) bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - - // verify our updates and delete happened - err = s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") - require.NoError(s.t, err) + wg.Wait() softDeleteQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"=TRUE`, + SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"`, dstTableName) - numRows, err := s.countRowsInQuery(softDeleteQuery) + numRows, err := s.RunInt64Query(softDeleteQuery) require.NoError(s.t, err) require.Equal(s.t, int64(0), numRows) } diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index cf0eda3b03..8c32b69696 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -165,7 +165,7 @@ func (s PeerFlowE2ETestSuitePG) checkSyncedAt(dstSchemaQualified string) error { return rows.Err() } -func (s PeerFlowE2ETestSuitePG) countRowsInQuery(query string) (int64, error) { +func (s PeerFlowE2ETestSuitePG) RunInt64Query(query string) (int64, error) { var count pgtype.Int8 err := s.pool.QueryRow(context.Background(), query).Scan(&count) return count.Int64, err From 46f9d280f308112f5d2c075b08fd63ac5e8bc8f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 04:33:59 +0000 Subject: [PATCH 16/67] First stab at snowflake --- flow/e2e/snowflake/peer_flow_sf_test.go | 27 +++++++++++-------------- flow/e2e/test_utils.go | 25 +++++++++++++++++++++++ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 8314032bea..01cd5a71ec 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -858,16 +858,19 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. + wg := sync.WaitGroup{} + wg.Add(1) go func() { - // insert first row. + defer wg.Done() + e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted initial row in the source table") - // verify we got our first row. - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", "test_simple_schema_changes", "id,c1") + expectedTableSchema := &protos.TableSchema{ TableIdentifier: strings.ToUpper(dstTableName), ColumnNames: []string{ @@ -901,7 +904,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { s.t.Log("Inserted row with added c2 in the source table") // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 4) + e2e.EnvWaitForEqualTables(env, s, "normalize altered row", "test_simple_schema_changes", "id,c1,c2") expectedTableSchema = &protos.TableSchema{ TableIdentifier: strings.ToUpper(dstTableName), ColumnNames: []string{ @@ -937,7 +940,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { s.t.Log("Inserted row with added c3 in the source table") // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 6) + e2e.EnvWaitForEqualTables(env, s, "normalize dropped c2 column", "test_simple_schema_changes", "id,c1,c3") expectedTableSchema = &protos.TableSchema{ TableIdentifier: strings.ToUpper(dstTableName), ColumnNames: []string{ @@ -975,7 +978,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { s.t.Log("Inserted row after dropping all columns in the source table") // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 8) + e2e.EnvWaitForEqualTables(env, s, "normalize dropped c3 column", "test_simple_schema_changes", "id,c1") expectedTableSchema = &protos.TableSchema{ TableIdentifier: strings.ToUpper(dstTableName), ColumnNames: []string{ @@ -1001,18 +1004,12 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { e2e.EnvNoError(s.t, env, err) e2e.EnvEqual(s.t, env, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - - env.AssertExpectations(s.t) + wg.Wait() } func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index bcff6e9d3e..d7d76fce8d 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -125,6 +125,31 @@ func EnvEqualTables(env *testsuite.TestWorkflowEnvironment, suite e2eshared.RowS EnvEqualRecordBatches(t, env, pgRows, rows) } +func EnvWaitForEqualTables( + env *testsuite.TestWorkflowEnvironment, + suite e2eshared.RowSource, + reason string, + table string, + cols string, +) { + t := suite.T() + EnvWaitFor(t, env, time.Minute, reason, func(ctx context.Context) bool { + suffix := suite.Suffix() + pool := suite.Pool() + pgRows, err := GetPgRows(pool, suffix, table, cols) + if err != nil { + return false + } + + rows, err := suite.GetRows(table, cols) + if err != nil { + return false + } + + return e2eshared.CheckEqualRecordBatches(t, pgRows, rows) + }) +} + func SetupCDCFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, connectionGen FlowConnectionGenerationConfig, ) { From 3dd64e2b3af30a4cfea14c793c8834e8783b2b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 16:49:46 +0000 Subject: [PATCH 17/67] log cancel --- flow/e2e/snowflake/peer_flow_sf_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 01cd5a71ec..80ec22bf94 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1009,6 +1009,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + s.t.Log("--- workflow done ---") wg.Wait() } From 68cdc874d0119a6a87358273381114410c54e04e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 17:10:56 +0000 Subject: [PATCH 18/67] -1 --- flow/e2e/snowflake/peer_flow_sf_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 80ec22bf94..8aeb91db37 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -852,7 +852,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 1, + ExitAfterRecords: -1, MaxBatchSize: 100, } From 6a9223914ba104b24218ea16bdf066ad56c8488a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 18:09:12 +0000 Subject: [PATCH 19/67] compare schema helper, more logging --- flow/e2e/postgres/peer_flow_pg_test.go | 7 +------ flow/e2e/snowflake/peer_flow_sf_test.go | 5 +++-- flow/e2e/test_utils.go | 23 +++++++++++++++++++++-- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index cd0b37b2c2..e3a80f50ce 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -3,7 +3,6 @@ package e2e_postgres import ( "context" "fmt" - "slices" "sync" "time" @@ -119,11 +118,7 @@ func WaitFuncSchema( return false } tableSchema := output.TableNameSchemaMapping[dstTableName] - if expectedSchema.TableIdentifier != tableSchema.TableIdentifier || - expectedSchema.IsReplicaIdentityFull != tableSchema.IsReplicaIdentityFull || - slices.Compare(expectedSchema.PrimaryKeyColumns, tableSchema.PrimaryKeyColumns) != 0 || - slices.Compare(expectedSchema.ColumnNames, tableSchema.ColumnNames) != 0 || - slices.Compare(expectedSchema.ColumnTypes, tableSchema.ColumnTypes) != 0 { + if !e2e.CompareTableSchemas(expectedSchema, tableSchema) { s.t.Log("schemas unequal", expectedSchema, tableSchema) return false } diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 8aeb91db37..121d50de2a 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -870,6 +870,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { s.t.Log("Inserted initial row in the source table") e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", "test_simple_schema_changes", "id,c1") + s.t.Log("Tables equal") expectedTableSchema := &protos.TableSchema{ TableIdentifier: strings.ToUpper(dstTableName), @@ -890,8 +891,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { TableIdentifiers: []string{dstTableName}, }) e2e.EnvNoError(s.t, env, err) - e2e.EnvEqual(s.t, env, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1") + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) + s.t.Log("Schemas equal") // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index d7d76fce8d..43e39d4bac 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -7,6 +7,7 @@ import ( "log/slog" "os" "runtime" + "slices" "strings" "testing" "time" @@ -86,6 +87,16 @@ func EnvEqual[T comparable](t *testing.T, env *testsuite.TestWorkflowEnvironment } } +func EnvTrue(t *testing.T, env *testsuite.TestWorkflowEnvironment, val bool) { + t.Helper() + + if !val { + t.Error("assertion failed") + env.CancelWorkflow() + runtime.Goexit() + } +} + func GetPgRows(pool *pgxpool.Pool, suffix string, table string, cols string) (*model.QRecordBatch, error) { pgQueryExecutor := connpostgres.NewQRepQueryExecutor(pool, context.Background(), "testflow", "testpart") pgQueryExecutor.SetTestEnv(true) @@ -528,6 +539,14 @@ func (l *TStructuredLogger) Error(msg string, keyvals ...interface{}) { l.logger.With(l.keyvalsToFields(keyvals)).Error(msg) } +func CompareTableSchemas(x *protos.TableSchema, y *protos.TableSchema) bool { + return x.TableIdentifier == y.TableIdentifier || + x.IsReplicaIdentityFull == y.IsReplicaIdentityFull || + slices.Compare(x.PrimaryKeyColumns, y.PrimaryKeyColumns) == 0 || + slices.Compare(x.ColumnNames, y.ColumnNames) == 0 || + slices.Compare(x.ColumnTypes, y.ColumnTypes) == 0 +} + func RequireEqualRecordBatches(t *testing.T, q *model.QRecordBatch, other *model.QRecordBatch) { t.Helper() require.True(t, e2eshared.CheckEqualRecordBatches(t, q, other)) @@ -543,7 +562,7 @@ func EnvEqualRecordBatches(t *testing.T, env *testsuite.TestWorkflowEnvironment, } } -func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, name string, f func(ctx context.Context) bool) { +func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, reason string, f func(ctx context.Context) bool) { t.Helper() ctx, cleanup := context.WithTimeout(context.Background(), timeout) @@ -552,7 +571,7 @@ func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout ti for !f(ctx) { t.Log(time.Now(), deadline) if time.Now().Compare(deadline) >= 0 { - t.Error("WaitFor timed out", name) + t.Error("WaitFor timed out", reason) env.CancelWorkflow() runtime.Goexit() } From b95a1322399b30328560268b94f1a5c7b7691e73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 18:46:18 +0000 Subject: [PATCH 20/67] fix up expected schema --- flow/e2e/snowflake/peer_flow_sf_test.go | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 121d50de2a..14282d7aa9 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -911,16 +911,14 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { ColumnNames: []string{ "ID", "C1", - "C2", - "_PEERDB_IS_DELETED", "_PEERDB_SYNCED_AT", + "C2", }, ColumnTypes: []string{ string(qvalue.QValueKindNumeric), string(qvalue.QValueKindNumeric), - string(qvalue.QValueKindNumeric), - string(qvalue.QValueKindBoolean), string(qvalue.QValueKindTimestamp), + string(qvalue.QValueKindNumeric), }, } output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ @@ -947,18 +945,16 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { ColumnNames: []string{ "ID", "C1", + "_PEERDB_SYNCED_AT", "C2", "C3", - "_PEERDB_IS_DELETED", - "_PEERDB_SYNCED_AT", }, ColumnTypes: []string{ string(qvalue.QValueKindNumeric), string(qvalue.QValueKindNumeric), + string(qvalue.QValueKindTimestamp), string(qvalue.QValueKindNumeric), string(qvalue.QValueKindNumeric), - string(qvalue.QValueKindBoolean), - string(qvalue.QValueKindTimestamp), }, } output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ @@ -985,18 +981,16 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { ColumnNames: []string{ "ID", "C1", + "_PEERDB_SYNCED_AT", "C2", "C3", - "_PEERDB_IS_DELETED", - "_PEERDB_SYNCED_AT", }, ColumnTypes: []string{ string(qvalue.QValueKindNumeric), string(qvalue.QValueKindNumeric), + string(qvalue.QValueKindTimestamp), string(qvalue.QValueKindNumeric), string(qvalue.QValueKindNumeric), - string(qvalue.QValueKindBoolean), - string(qvalue.QValueKindTimestamp), }, } output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ From 6f591db77f132484f1c3795709ff8a52b51a9cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 19:23:27 +0000 Subject: [PATCH 21/67] fix comparisons --- flow/connectors/snowflake/snowflake.go | 6 ++---- flow/e2e/snowflake/peer_flow_sf_test.go | 8 +++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/flow/connectors/snowflake/snowflake.go b/flow/connectors/snowflake/snowflake.go index bb1eb4240c..3ed7f1cea8 100644 --- a/flow/connectors/snowflake/snowflake.go +++ b/flow/connectors/snowflake/snowflake.go @@ -248,8 +248,7 @@ func (c *SnowflakeConnector) getTableSchemaForTable(tableName string) (*protos.T return nil, fmt.Errorf("error querying Snowflake peer for schema of table %s: %w", tableName, err) } defer func() { - // not sure if the errors these two return are same or different? - err = errors.Join(rows.Close(), rows.Err()) + err = rows.Close() if err != nil { c.logger.Error("error while closing rows for reading schema of table", slog.String("tableName", tableName), @@ -289,8 +288,7 @@ func (c *SnowflakeConnector) GetLastOffset(jobName string) (int64, error) { return 0, fmt.Errorf("error querying Snowflake peer for last syncedID: %w", err) } defer func() { - // not sure if the errors these two return are same or different? - err = errors.Join(rows.Close(), rows.Err()) + err = rows.Close() if err != nil { c.logger.Error("error while closing rows for reading last offset", slog.Any("error", err)) } diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 14282d7aa9..00a1768717 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -870,7 +870,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { s.t.Log("Inserted initial row in the source table") e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", "test_simple_schema_changes", "id,c1") - s.t.Log("Tables equal") expectedTableSchema := &protos.TableSchema{ TableIdentifier: strings.ToUpper(dstTableName), @@ -892,7 +891,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { }) e2e.EnvNoError(s.t, env, err) e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) - s.t.Log("Schemas equal") // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -925,7 +923,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { TableIdentifiers: []string{dstTableName}, }) e2e.EnvNoError(s.t, env, err) - e2e.EnvEqual(s.t, env, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c2") // alter source table, add column c3, drop column c2 and insert another row. @@ -961,7 +959,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { TableIdentifiers: []string{dstTableName}, }) e2e.EnvNoError(s.t, env, err) - e2e.EnvEqual(s.t, env, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c3") // alter source table, drop column c3 and insert another row. @@ -997,7 +995,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { TableIdentifiers: []string{dstTableName}, }) e2e.EnvNoError(s.t, env, err) - e2e.EnvEqual(s.t, env, expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1") env.CancelWorkflow() From cd620b90d907247128aeaa2f7ddc3c8034fb4f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 19:50:33 +0000 Subject: [PATCH 22/67] don't need waitgroup --- flow/e2e/postgres/peer_flow_pg_test.go | 26 ------------------------- flow/e2e/snowflake/peer_flow_sf_test.go | 24 ++++------------------- flow/e2e/test_utils.go | 15 ++++++++++++-- 3 files changed, 17 insertions(+), 48 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index e3a80f50ce..bc6a3c294c 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -3,7 +3,6 @@ package e2e_postgres import ( "context" "fmt" - "sync" "time" "github.com/PeerDB-io/peer-flow/e2e" @@ -158,10 +157,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. - wg := sync.WaitGroup{} - wg.Add(1) go func() { - defer wg.Done() // insert first row. e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -260,7 +256,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - wg.Wait() } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { @@ -298,10 +293,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - wg := sync.WaitGroup{} - wg.Add(1) go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -329,7 +321,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - wg.Wait() } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { @@ -451,10 +442,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - wg := sync.WaitGroup{} - wg.Add(1) go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) // insert 10 rows into the source table @@ -484,7 +472,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - wg.Wait() } func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { @@ -592,13 +579,9 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { MaxBatchSize: 100, } - wg := sync.WaitGroup{} - wg.Add(1) - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -629,7 +612,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - wg.Wait() // verify our updates and delete happened err = s.comparePGTables(cmpTableName, dstTableName, "id,c1,c2,t") @@ -774,10 +756,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - wg := sync.WaitGroup{} - wg.Add(1) go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -813,7 +792,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - wg.Wait() // verify our updates and delete happened require.NoError(s.t, err) @@ -870,10 +848,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. - wg := sync.WaitGroup{} - wg.Add(1) go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -899,7 +874,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - wg.Wait() softDeleteQuery := fmt.Sprintf(` SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"`, diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 00a1768717..e1ebd6fe41 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -152,7 +152,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 20, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -170,21 +170,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { e2e.EnvNoError(s.t, env, err) } s.t.Log("Inserted 20 rows into the source table") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", srcTableName, dstTableName, "id,c1") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - - count, err := s.sfHelper.CountRows("test_simple_flow_sf") - require.NoError(s.t, err) - require.Equal(s.t, 20, count) - // check the number of rows where _PEERDB_SYNCED_AT is newer than 5 mins ago // it should match the count. newerSyncedAtQuery := fmt.Sprintf(` @@ -194,9 +186,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { require.NoError(s.t, err) require.Equal(s.t, 20, numNewRows) - // TODO: verify that the data is correctly synced to the destination table - // on the Snowflake side - env.AssertExpectations(s.t) } @@ -858,10 +847,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. - wg := sync.WaitGroup{} - wg.Add(1) go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -1002,8 +988,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - s.t.Log("--- workflow done ---") - wg.Wait() } func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 43e39d4bac..ffac338d6b 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -142,17 +142,28 @@ func EnvWaitForEqualTables( reason string, table string, cols string, +) { + EnvWaitForEqualTablesWithNames(env, suite, reason, table, table, cols) +} + +func EnvWaitForEqualTablesWithNames( + env *testsuite.TestWorkflowEnvironment, + suite e2eshared.RowSource, + reason string, + srcTable string, + dstTable string, + cols string, ) { t := suite.T() EnvWaitFor(t, env, time.Minute, reason, func(ctx context.Context) bool { suffix := suite.Suffix() pool := suite.Pool() - pgRows, err := GetPgRows(pool, suffix, table, cols) + pgRows, err := GetPgRows(pool, suffix, srcTable, cols) if err != nil { return false } - rows, err := suite.GetRows(table, cols) + rows, err := suite.GetRows(dstTable, cols) if err != nil { return false } From d47371a98d1e52397cda97dce82ae929209b051b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 20:09:06 +0000 Subject: [PATCH 23/67] rest of snowflake no longer using normalize flow count --- flow/e2e/bigquery/peer_flow_bq_test.go | 8 +- flow/e2e/snowflake/peer_flow_sf_test.go | 122 +++++++----------------- 2 files changed, 39 insertions(+), 91 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 5da81a3e3e..a3eb89e61e 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1463,7 +1463,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { e2e.RequireEqualTables(s, "test_softdel", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED = TRUE`, + SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, s.bqHelper.datasetName, dstTableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) @@ -1547,7 +1547,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { e2e.RequireEqualTables(s, "test_softdel_iud", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED = TRUE`, + SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, s.bqHelper.datasetName, dstTableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) @@ -1635,7 +1635,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.RequireEqualTables(s, "test_softdel_ud", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED = TRUE`, + SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, s.bqHelper.datasetName, dstTableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) @@ -1711,7 +1711,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { e2e.RequireEqualTables(s, "test_softdel_iad", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED = TRUE`, + SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, s.bqHelper.datasetName, dstTableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index e1ebd6fe41..531affbd74 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -185,8 +185,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, 20, numNewRows) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { @@ -251,8 +249,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { count, err := s.sfHelper.CountRows("test_replica_identity_no_pkey") require.NoError(s.t, err) require.Equal(s.t, 20, count) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { @@ -333,8 +329,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { polyCount, err := s.sfHelper.CountNonNullRows("test_invalid_geo_sf_avro_cdc", "poly") require.NoError(s.t, err) require.Equal(s.t, 6, polyCount) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_SF() { @@ -401,7 +395,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_SF() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, "test_toast_sf_1", `id,t1,t2,k`) - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF() { @@ -463,7 +456,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, "test_toast_sf_2", `id,t1,t2,k`) - env.AssertExpectations(s.t) wg.Wait() } @@ -537,7 +529,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, "test_toast_sf_3", `id,t1,t2,k`) - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { @@ -603,7 +594,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, "test_toast_sf_4", `id,t1,k`) - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { @@ -669,7 +659,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, "test_toast_sf_5", `id,t1,t2,k`) - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { @@ -752,8 +741,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { // Make sure that there are no nulls require.Equal(s.t, noNulls, true) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { @@ -811,8 +798,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { require.Equal(s.t, 1, count1) require.Equal(s.t, 1, count2) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { @@ -848,7 +833,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) @@ -1019,7 +1003,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 10, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1037,30 +1021,19 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { } s.t.Log("Inserted 10 rows into the source table") - // verify we got our 10 rows - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) - e2e.EnvEqualTables(env, s, "test_simple_cpkey", "id,c1,c2,t") + e2e.EnvWaitForEqualTables(env, s, "normalize table", "test_simple_cpkey", "id,c1,c2,t") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", "test_simple_cpkey", "id,c1,c2,t") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_simple_cpkey", "id,c1,c2,t") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { @@ -1135,8 +1108,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { // verify our updates and delete happened e2e.RequireEqualTables(s, "test_cpkey_toast1", "id,c1,c2,t,t2") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { @@ -1169,7 +1140,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 10, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1188,27 +1159,18 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { } s.t.Log("Inserted 10 rows into the source table") - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", srcTableName, dstTableName, "id,c2,t,t2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update/delete", srcTableName, dstTableName, "id,c2,t,t2") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_cpkey_toast2", "id,c1,c2,t,t2") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { @@ -1269,18 +1231,18 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { } s.t.Log("Inserted 10 rows into the source table") - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", srcTableName, dstTableName, "id,t,t2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update/delete", srcTableName, dstTableName, "id,t,t2") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") query := fmt.Sprintf("SELECT * FROM %s.%s.test_exclude_sf ORDER BY id", s.sfHelper.testDatabaseName, s.sfHelper.testSchemaName) @@ -1333,27 +1295,23 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 3, + ExitAfterRecords: -1, MaxBatchSize: 100, } - wg := sync.WaitGroup{} - wg.Add(1) - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", srcTableName, dstTableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", srcTableName, dstTableName, "id,c1,c2,t") // since we delete stuff, create another table to compare with _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) @@ -1361,20 +1319,18 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize delete", srcTableName, dstTableName, "id,c1,c2,t") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - - wg.Wait() // verify our updates and delete happened e2e.RequireEqualTables(s, "test_softdel", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED = TRUE`, dstTableName) + SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED`, dstTableName) numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, 1, numNewRows) @@ -1457,7 +1413,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_IUD_Same_Batch() { e2e.RequireEqualTables(s, "test_softdel_iud", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED = TRUE`, dstTableName) + SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED`, dstTableName) numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, 1, numNewRows) @@ -1502,7 +1458,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 4, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1514,7 +1470,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcTableName, dstTableName, "id,c1,c2,t") insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1533,18 +1489,15 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize transaction", srcTableName, dstTableName, "id,c1,c2,t") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_softdel_ud", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED = TRUE`, dstTableName) + SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED`, dstTableName) numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, 1, numNewRows) @@ -1588,7 +1541,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 3, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1600,26 +1553,23 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", srcTableName, dstTableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize delete", srcTableName, dstTableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize reinsert", srcTableName, dstTableName, "id,c1,c2,t") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_softdel_iad", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED = TRUE`, dstTableName) + SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED`, dstTableName) numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, 0, numNewRows) @@ -1684,6 +1634,4 @@ func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { s.compareTableContentsWithDiffSelectorsSF("testMixedCase", `"pulseArmor","highGold","eVe",id`, `"pulseArmor","highGold","eVe",id`, true) - - env.AssertExpectations(s.t) } From 448f9fe907937bd7834c9a0b8dfb8288eb3d7fe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 20:09:53 +0000 Subject: [PATCH 24/67] remove env.AssertExpectations --- flow/e2e/bigquery/peer_flow_bq_test.go | 31 ------------------------- flow/e2e/bigquery/qrep_flow_bq_test.go | 4 ---- flow/e2e/postgres/qrep_flow_pg_test.go | 4 ---- flow/e2e/s3/cdc_s3_test.go | 2 -- flow/e2e/s3/qrep_flow_s3_test.go | 4 ---- flow/e2e/snowflake/qrep_flow_sf_test.go | 12 ---------- 6 files changed, 57 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index a3eb89e61e..663b84a897 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -198,8 +198,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Connection_Config() { // assert that error contains "invalid connection configs" require.Contains(s.t, err.Error(), "invalid connection configs") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { @@ -242,8 +240,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { // allow only continue as new error require.Contains(s.t, err.Error(), "continue as new") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { @@ -286,8 +282,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { // allow only continue as new error require.Contains(s.t, err.Error(), "continue as new") - - env.AssertExpectations(s.t) } // Test_Complete_Simple_Flow_BQ tests a complete flow with data in the source table. @@ -356,8 +350,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { // TODO: verify that the data is correctly synced to the destination table // on the bigquery side - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { @@ -425,7 +417,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, dstTableName, "id,t1,t2,k") - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ() { @@ -488,7 +479,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, dstTableName, "id,t1,t2,k") - env.AssertExpectations(s.t) <-done } @@ -563,7 +553,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, dstTableName, "id,t1,t2,k") - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { @@ -630,7 +619,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, dstTableName, "id,t1,k") - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { @@ -697,7 +685,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, dstTableName, "id,t1,t2,k") - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { @@ -779,8 +766,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { // check if JSON on bigquery side is a good JSON err = s.checkJSONValue(dstTableName, "c17", "sai", "-8.021390374331551") require.NoError(s.t, err) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { @@ -863,8 +848,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { require.Equal(s.t, 6, lineCount) require.Equal(s.t, 6, polyCount) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { @@ -924,8 +907,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { require.Equal(s.t, 1, count1) require.Equal(s.t, 1, count2) - - env.AssertExpectations(s.t) } // TODO: not checking schema exactly, add later @@ -1025,8 +1006,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { // allow only continue as new error require.Contains(s.t, err.Error(), "continue as new") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { @@ -1098,8 +1077,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { require.Contains(s.t, err.Error(), "continue as new") e2e.RequireEqualTables(s, dstTableName, "id,c1,c2,t") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { @@ -1175,8 +1152,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { // verify our updates and delete happened e2e.RequireEqualTables(s, dstTableName, "id,c1,c2,t,t2") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { @@ -1248,8 +1223,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { // verify our updates and delete happened e2e.RequireEqualTables(s, dstTableName, "id,c1,c2,t,t2") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { @@ -1311,8 +1284,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { err = s.checkPeerdbColumns(dstTableName, true) require.NoError(s.t, err) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { @@ -1379,8 +1350,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { err = s.bqHelper.DropDataset(secondDataset) require.NoError(s.t, err) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { diff --git a/flow/e2e/bigquery/qrep_flow_bq_test.go b/flow/e2e/bigquery/qrep_flow_bq_test.go index 24d845c500..6ac9f3be5d 100644 --- a/flow/e2e/bigquery/qrep_flow_bq_test.go +++ b/flow/e2e/bigquery/qrep_flow_bq_test.go @@ -44,8 +44,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { require.NoError(s.t, err) e2e.RequireEqualTables(s, tblName, "*") - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteBQ) Test_PeerDB_Columns_QRep_BQ() { @@ -79,6 +77,4 @@ func (s PeerFlowE2ETestSuiteBQ) Test_PeerDB_Columns_QRep_BQ() { err = s.checkPeerdbColumns(tblName, false) require.NoError(s.t, err) - - env.AssertExpectations(s.t) } diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index 8c32b69696..c0a848fbd2 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -258,8 +258,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Complete_QRep_Flow_Multi_Insert_PG() { err = s.comparePGTables(srcSchemaQualified, dstSchemaQualified, "*") require.NoError(s.t, err) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuitePG) Test_Setup_Destination_And_PeerDB_Columns_QRep_PG() { @@ -303,6 +301,4 @@ func (s PeerFlowE2ETestSuitePG) Test_Setup_Destination_And_PeerDB_Columns_QRep_P err = s.checkSyncedAt(dstSchemaQualified) require.NoError(s.t, err) - - env.AssertExpectations(s.t) } diff --git a/flow/e2e/s3/cdc_s3_test.go b/flow/e2e/s3/cdc_s3_test.go index 69e8f2cb40..4c5d8feb94 100644 --- a/flow/e2e/s3/cdc_s3_test.go +++ b/flow/e2e/s3/cdc_s3_test.go @@ -81,6 +81,4 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_Simple_Flow_S3() { require.NoError(s.t, err) require.Equal(s.t, 4, len(files)) - - env.AssertExpectations(s.t) } diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go index 68cc49fb44..6eaa2ebc31 100644 --- a/flow/e2e/s3/qrep_flow_s3_test.go +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -135,8 +135,6 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { require.NoError(s.t, err) require.Equal(s.t, 1, len(files)) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { @@ -186,6 +184,4 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { require.NoError(s.t, err) require.Equal(s.t, 10, len(files)) - - env.AssertExpectations(s.t) } diff --git a/flow/e2e/snowflake/qrep_flow_sf_test.go b/flow/e2e/snowflake/qrep_flow_sf_test.go index 3c032e35f3..e14bf4ec0b 100644 --- a/flow/e2e/snowflake/qrep_flow_sf_test.go +++ b/flow/e2e/snowflake/qrep_flow_sf_test.go @@ -87,8 +87,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { err = s.checkJSONValue(dstSchemaQualified, "f7", "key", "\"value\"") require.NoError(s.t, err) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() { @@ -132,8 +130,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { @@ -174,8 +170,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { @@ -220,8 +214,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() { @@ -265,8 +257,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) - - env.AssertExpectations(s.t) } func (s PeerFlowE2ETestSuiteSF) Test_PeerDB_Columns_QRep_SF() { @@ -311,6 +301,4 @@ func (s PeerFlowE2ETestSuiteSF) Test_PeerDB_Columns_QRep_SF() { err = s.sfHelper.checkSyncedAt(fmt.Sprintf(`SELECT "_PEERDB_SYNCED_AT" FROM %s.%s`, s.sfHelper.testSchemaName, tblName)) require.NoError(s.t, err) - - env.AssertExpectations(s.t) } From 028d0fd70fb50a99223acfafde0c2002a9e47225 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 20:31:06 +0000 Subject: [PATCH 25/67] try fixing soft delete mixup --- flow/e2e/snowflake/peer_flow_sf_test.go | 41 ++++++++++++++++--------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 531affbd74..7a03f25188 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1275,7 +1275,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_softdel"), + FlowJobName: cmpTableName, } config := &protos.FlowConnectionConfigs{ @@ -1312,23 +1312,23 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", srcTableName, dstTableName, "id,c1,c2,t") - // since we delete stuff, create another table to compare with - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize delete", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize delete", + srcTableName, + dstTableName+" WHERE NOT _PEERDB_IS_DELETED", + "id,c1,c2,t", + ) env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_softdel", "id,c1,c2,t") - newerSyncedAtQuery := fmt.Sprintf(` SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED`, dstTableName) numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) @@ -1480,16 +1480,19 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - // since we delete stuff, create another table to compare with - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize transaction", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize transaction", + srcTableName, + dstTableName+" WHERE NOT _PEERDB_IS_DELETED", + "id,c1,c2,t", + ) env.CancelWorkflow() }() @@ -1557,7 +1560,15 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize delete", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize delete", + srcTableName, + dstTableName+" WHERE NOT _PEERDB_IS_DELETED", + "id,c1,c2,t", + ) + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) From e574110afce60876704ef6b29e103fe2b3f12240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 20:48:07 +0000 Subject: [PATCH 26/67] clean up non longer needed cmpTableName --- flow/e2e/postgres/peer_flow_pg_test.go | 4 ---- flow/e2e/snowflake/peer_flow_sf_test.go | 8 +++----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index bc6a3c294c..1cc897638f 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -774,10 +774,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - // since we delete stuff, create another table to compare with - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 7a03f25188..070e66f8a0 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1260,8 +1260,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - cmpTableName := s.attachSchemaSuffix("test_softdel") - srcTableName := fmt.Sprintf("%s_src", cmpTableName) + srcTableName := s.attachSchemaSuffix("test_softdel_src") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -1275,7 +1274,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: cmpTableName, + FlowJobName: srcTableName, } config := &protos.FlowConnectionConfigs{ @@ -1423,8 +1422,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - cmpTableName := s.attachSchemaSuffix("test_softdel_ud") - srcTableName := fmt.Sprintf("%s_src", cmpTableName) + srcTableName := s.attachSchemaSuffix("test_softdel_ud_src") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel_ud") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` From d9b68ed5d924634bb7def6fe142c8825ba3c903c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 21:10:37 +0000 Subject: [PATCH 27/67] equaltables already handles schema --- flow/e2e/snowflake/peer_flow_sf_test.go | 40 +++++++++++++------------ 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 070e66f8a0..15e69b7e2a 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1177,7 +1177,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_exclude_sf") + tableName := "test_exclude_sf" + srcTableName := s.attachSchemaSuffix(tableName) dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_exclude_sf") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -1212,7 +1213,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 10, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1231,22 +1232,20 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", srcTableName, dstTableName, "id,t,t2") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", tableName, dstTableName, "id,t,t2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update/delete", srcTableName, dstTableName, "id,t,t2") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update/delete", tableName, dstTableName, "id,t,t2") env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - query := fmt.Sprintf("SELECT * FROM %s.%s.test_exclude_sf ORDER BY id", - s.sfHelper.testDatabaseName, s.sfHelper.testSchemaName) - sfRows, err := s.sfHelper.ExecuteAndProcessQuery(query) + sfRows, err := s.GetRows("*", "test_exclude_sf") require.NoError(s.t, err) for _, field := range sfRows.Schema.Fields { @@ -1260,7 +1259,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_softdel_src") + tableName := "test_softdel_src" + srcTableName := s.attachSchemaSuffix(tableName) dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -1306,11 +1306,11 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", tableName, dstTableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", tableName, dstTableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) @@ -1318,7 +1318,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { env, s, "normalize delete", - srcTableName, + tableName, dstTableName+" WHERE NOT _PEERDB_IS_DELETED", "id,c1,c2,t", ) @@ -1422,7 +1422,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_softdel_ud_src") + tableName := "test_softdel_ud_src" + srcTableName := s.attachSchemaSuffix(tableName) dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel_ud") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -1468,7 +1469,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", tableName, dstTableName, "id,c1,c2,t") insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1487,7 +1488,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { env, s, "normalize transaction", - srcTableName, + tableName, dstTableName+" WHERE NOT _PEERDB_IS_DELETED", "id,c1,c2,t", ) @@ -1508,7 +1509,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_softdel_iad") + tableName := "test_softdel_iad" + srcTableName := s.attachSchemaSuffix(tableName) dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel_iad") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -1522,7 +1524,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_softdel_iad"), + FlowJobName: srcTableName, } config := &protos.FlowConnectionConfigs{ @@ -1554,7 +1556,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", tableName, dstTableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) @@ -1562,7 +1564,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { env, s, "normalize delete", - srcTableName, + tableName, dstTableName+" WHERE NOT _PEERDB_IS_DELETED", "id,c1,c2,t", ) @@ -1570,7 +1572,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize reinsert", srcTableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize reinsert", tableName, dstTableName, "id,c1,c2,t") env.CancelWorkflow() }() From 86ca8b9f4365bf0a37aa8c379dc3ee25c6c2aa87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 21:37:49 +0000 Subject: [PATCH 28/67] more tablename cleanup --- flow/e2e/snowflake/peer_flow_sf_test.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 15e69b7e2a..6314afa38e 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -129,8 +129,9 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_simple_flow_sf") - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_simple_flow_sf") + tableName := "test_simple_flow_sf" + srcTableName := s.attachSchemaSuffix(tableName) + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tableName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -170,7 +171,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { e2e.EnvNoError(s.t, env, err) } s.t.Log("Inserted 20 rows into the source table") - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", srcTableName, dstTableName, "id,c1") + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1") env.CancelWorkflow() }() @@ -1114,8 +1115,9 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_cpkey_toast2") - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_cpkey_toast2") + tableName := "test_cpkey_toast2" + srcTableName := s.attachSchemaSuffix(tableName) + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tableName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1130,7 +1132,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_cpkey_toast2_flow"), + FlowJobName: srcTableName, TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -1159,13 +1161,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", srcTableName, dstTableName, "id,c2,t,t2") + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c2,t,t2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update/delete", srcTableName, dstTableName, "id,c2,t,t2") + e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,c2,t,t2") env.CancelWorkflow() }() From 758b587dc7e730e04e7e61dd25bbc7c6eb88a2a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 21:47:47 +0000 Subject: [PATCH 29/67] fix softdel_iad, convert mixedcase --- flow/e2e/snowflake/peer_flow_sf_test.go | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 6314afa38e..2f84c130d1 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1513,7 +1513,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { tableName := "test_softdel_iad" srcTableName := s.attachSchemaSuffix(tableName) - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel_iad") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tableName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1574,7 +1574,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize reinsert", tableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1,c2,t") env.CancelWorkflow() }() @@ -1616,7 +1616,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 20, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1634,17 +1634,17 @@ func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { e2e.EnvNoError(s.t, env, err) } s.t.Log("Inserted 20 rows into the source table") + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize mixed case", + "testMixedCase", + "\"testMixedCase\"", + "id,\"pulseArmor\",\"highGold\",\"eVe\"", + ) + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - - s.compareTableContentsWithDiffSelectorsSF("testMixedCase", `"pulseArmor","highGold","eVe",id`, - `"pulseArmor","highGold","eVe",id`, true) } From fbcc64c6de48064a1458ff9b50cb9528383429fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 22:02:23 +0000 Subject: [PATCH 30/67] more table name fixing --- flow/e2e/snowflake/peer_flow_sf_test.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 2f84c130d1..16473f5b95 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1181,7 +1181,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { tableName := "test_exclude_sf" srcTableName := s.attachSchemaSuffix(tableName) - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_exclude_sf") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tableName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1234,13 +1234,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize table", tableName, dstTableName, "id,t,t2") + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,t,t2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update/delete", tableName, dstTableName, "id,t,t2") + e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,t,t2") env.CancelWorkflow() }() @@ -1262,8 +1262,9 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { e2e.RegisterWorkflowsAndActivities(s.t, env) tableName := "test_softdel_src" + dstName := "test_softdel" srcTableName := s.attachSchemaSuffix(tableName) - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, dstName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1308,11 +1309,11 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", tableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", tableName, dstName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", tableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", tableName, dstName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) @@ -1321,7 +1322,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { s, "normalize delete", tableName, - dstTableName+" WHERE NOT _PEERDB_IS_DELETED", + dstName+" WHERE NOT _PEERDB_IS_DELETED", "id,c1,c2,t", ) @@ -1425,8 +1426,9 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { e2e.RegisterWorkflowsAndActivities(s.t, env) tableName := "test_softdel_ud_src" + dstName := "test_softdel_ud" srcTableName := s.attachSchemaSuffix(tableName) - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_softdel_ud") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, dstName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1471,7 +1473,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", tableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", tableName, dstName, "id,c1,c2,t") insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1491,7 +1493,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { s, "normalize transaction", tableName, - dstTableName+" WHERE NOT _PEERDB_IS_DELETED", + dstName+" WHERE NOT _PEERDB_IS_DELETED", "id,c1,c2,t", ) From c1ddff74af031b55bfddfa5acb35fc0fde9d5199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 22:22:06 +0000 Subject: [PATCH 31/67] fix error reporting, fix flow names --- flow/e2e/postgres/peer_flow_pg_test.go | 3 +-- flow/e2e/snowflake/peer_flow_sf_test.go | 8 ++++---- flow/e2e/test_utils.go | 4 ++++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 1cc897638f..73da88436f 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -750,7 +750,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 4, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -777,7 +777,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", func(ctx context.Context) bool { diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 16473f5b95..095efa2612 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -143,7 +143,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_simple_flow"), + FlowJobName: srcTableName, TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -171,7 +171,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { e2e.EnvNoError(s.t, env, err) } s.t.Log("Inserted 20 rows into the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1") + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,key,value") env.CancelWorkflow() }() @@ -208,7 +208,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_simple_flow"), + FlowJobName: srcTableName, TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -269,7 +269,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_invalid_geo_sf_avro_cdc"), + FlowJobName: srcTableName, TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index ffac338d6b..3c4f7b476e 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -143,6 +143,8 @@ func EnvWaitForEqualTables( table string, cols string, ) { + suite.T().Helper() + EnvWaitForEqualTablesWithNames(env, suite, reason, table, table, cols) } @@ -155,6 +157,8 @@ func EnvWaitForEqualTablesWithNames( cols string, ) { t := suite.T() + t.Helper() + EnvWaitFor(t, env, time.Minute, reason, func(ctx context.Context) bool { suffix := suite.Suffix() pool := suite.Pool() From 1b989fa53157a326bf2923dfff067bfdf865de22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 23:05:50 +0000 Subject: [PATCH 32/67] remove no longer used parameter --- flow/e2e/snowflake/qrep_flow_sf_test.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/flow/e2e/snowflake/qrep_flow_sf_test.go b/flow/e2e/snowflake/qrep_flow_sf_test.go index e14bf4ec0b..b23ec8bb67 100644 --- a/flow/e2e/snowflake/qrep_flow_sf_test.go +++ b/flow/e2e/snowflake/qrep_flow_sf_test.go @@ -32,15 +32,10 @@ func (s PeerFlowE2ETestSuiteSF) checkJSONValue(tableName, colName, fieldName, va return nil } -func (s PeerFlowE2ETestSuiteSF) compareTableContentsWithDiffSelectorsSF(tableName, pgSelector, sfSelector string, - tableCaseSensitive bool, -) { +func (s PeerFlowE2ETestSuiteSF) compareTableContentsWithDiffSelectorsSF(tableName, pgSelector, sfSelector string) { pgRows, err := e2e.GetPgRows(s.pool, s.pgSuffix, tableName, pgSelector) require.NoError(s.t, err) - if tableCaseSensitive { - tableName = fmt.Sprintf("\"%s\"", tableName) - } sfRows, err := s.GetRows(tableName, sfSelector) require.NoError(s.t, err) @@ -83,7 +78,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { require.NoError(s.t, err) sel := e2e.GetOwnersSelectorStringsSF() - s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) + s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) err = s.checkJSONValue(dstSchemaQualified, "f7", "key", "\"value\"") require.NoError(s.t, err) @@ -129,7 +124,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() require.NoError(s.t, err) sel := e2e.GetOwnersSelectorStringsSF() - s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) + s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { @@ -169,7 +164,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { require.NoError(s.t, err) sel := e2e.GetOwnersSelectorStringsSF() - s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) + s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { @@ -213,7 +208,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { require.NoError(s.t, err) sel := e2e.GetOwnersSelectorStringsSF() - s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) + s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() { @@ -256,7 +251,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() require.NoError(s.t, err) sel := e2e.GetOwnersSelectorStringsSF() - s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1], false) + s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_PeerDB_Columns_QRep_SF() { From e6c9a27eae677c9e80f127c80724501e40cbd5de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 23:31:25 +0000 Subject: [PATCH 33/67] fix flowjobname --- flow/e2e/snowflake/peer_flow_sf_test.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 095efa2612..dedc9f60a0 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -143,7 +143,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: srcTableName, + FlowJobName: s.attachSuffix(srcTableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -192,7 +192,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_replica_identity_no_pkey") + tableName := "test_replica_identity_no_pkey" + srcTableName := s.attachSchemaSuffix(tableName) dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_replica_identity_no_pkey") // Create a table without a primary key and create a named unique index @@ -208,7 +209,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: srcTableName, + FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -256,7 +257,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_invalid_geo_sf_avro_cdc") + tableName := "test_invalid_geo_sf_avro_cdc" + srcTableName := s.attachSchemaSuffix(tableName) dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_invalid_geo_sf_avro_cdc") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -269,7 +271,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: srcTableName, + FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -1132,7 +1134,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: srcTableName, + FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -1277,7 +1279,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: srcTableName, + FlowJobName: s.attachSuffix(dstName), } config := &protos.FlowConnectionConfigs{ @@ -1528,7 +1530,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: srcTableName, + FlowJobName: s.attachSuffix(tableName), } config := &protos.FlowConnectionConfigs{ From 544f59c69c7015669a6be696bcdb8d8523f2cbc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 5 Jan 2024 23:43:23 +0000 Subject: [PATCH 34/67] fix missed waitfor fix --- flow/e2e/bigquery/peer_flow_bq_test.go | 1 + flow/e2e/snowflake/peer_flow_sf_test.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 663b84a897..00e084b429 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -42,6 +42,7 @@ func (s PeerFlowE2ETestSuiteBQ) Suffix() string { } func (s PeerFlowE2ETestSuiteBQ) GetRows(tableName string, colsString string) (*model.QRecordBatch, error) { + s.t.Helper() qualifiedTableName := fmt.Sprintf("`%s.%s`", s.bqHelper.Config.DatasetId, tableName) bqSelQuery := fmt.Sprintf("SELECT %s FROM %s ORDER BY id", colsString, qualifiedTableName) s.t.Logf("running query on bigquery: %s", bqSelQuery) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index dedc9f60a0..6c9059a9a7 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -44,6 +44,7 @@ func (s PeerFlowE2ETestSuiteSF) Suffix() string { } func (s PeerFlowE2ETestSuiteSF) GetRows(tableName string, sfSelector string) (*model.QRecordBatch, error) { + s.t.Helper() qualifiedTableName := fmt.Sprintf(`%s.%s.%s`, s.sfHelper.testDatabaseName, s.sfHelper.testSchemaName, tableName) sfSelQuery := fmt.Sprintf(`SELECT %s FROM %s ORDER BY id`, sfSelector, qualifiedTableName) s.t.Logf("running query on snowflake: %s", sfSelQuery) @@ -1562,7 +1563,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", tableName, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTables(env, s, "normalize row", tableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) From 12cbe0a090f1c133bbb9c40e8b9c47ea67a748be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 00:00:27 +0000 Subject: [PATCH 35/67] more fix --- flow/e2e/snowflake/peer_flow_sf_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 6c9059a9a7..a784073d2f 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -144,7 +144,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix(srcTableName), + FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, @@ -1199,7 +1199,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_exclude_flow"), + FlowJobName: s.attachSuffix(tableName), } config := &protos.FlowConnectionConfigs{ @@ -1444,7 +1444,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_softdel_ud"), + FlowJobName: s.attachSuffix(dstName), } config := &protos.FlowConnectionConfigs{ @@ -1572,7 +1572,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { s, "normalize delete", tableName, - dstTableName+" WHERE NOT _PEERDB_IS_DELETED", + tableName+" WHERE NOT _PEERDB_IS_DELETED", "id,c1,c2,t", ) From 53a181d459ed2af66fc05765916bb4acb060a3c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 01:06:09 +0000 Subject: [PATCH 36/67] bq --- flow/e2e/bigquery/peer_flow_bq_test.go | 213 ++++++++++++------------- flow/e2e/test_utils.go | 30 ---- 2 files changed, 104 insertions(+), 139 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 00e084b429..0a2c63a102 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -5,7 +5,6 @@ import ( "fmt" "log/slog" "strings" - "sync" "testing" "time" @@ -49,6 +48,14 @@ func (s PeerFlowE2ETestSuiteBQ) GetRows(tableName string, colsString string) (*m return s.bqHelper.ExecuteAndProcessQuery(bqSelQuery) } +func (s PeerFlowE2ETestSuiteBQ) GetRowsWhere(tableName string, colsString string, where string) (*model.QRecordBatch, error) { + s.t.Helper() + qualifiedTableName := fmt.Sprintf("`%s.%s`", s.bqHelper.Config.DatasetId, tableName) + bqSelQuery := fmt.Sprintf("SELECT %s FROM %s WHERE %s ORDER BY id", colsString, qualifiedTableName, where) + s.t.Logf("running query on bigquery: %s", bqSelQuery) + return s.bqHelper.ExecuteAndProcessQuery(bqSelQuery) +} + func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { e2eshared.RunSuite(t, setupSuite, func(s PeerFlowE2ETestSuiteBQ) { err := e2e.TearDownPostgres(s.pool, s.bqSuffix) @@ -915,8 +922,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) + tableName := "test_simple_schema_changes" srcTableName := s.attachSchemaSuffix("test_simple_schema_changes") - dstTableName := "test_simple_schema_changes" _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -928,7 +935,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_simple_schema_changes"), - TableNameMapping: map[string]string{srcTableName: dstTableName}, + TableNameMapping: map[string]string{srcTableName: tableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CdcStagingPath: "", @@ -938,7 +945,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 1, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -952,9 +959,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted initial row in the source table") - // verify we got our first row. - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1") + e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1") // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -967,8 +972,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { s.t.Log("Inserted row with added c2 in the source table") // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 4) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c2") + e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1,c2") // alter source table, add column c3, drop column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -981,8 +985,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { s.t.Log("Inserted row with added c3 in the source table") // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 6) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c3") + e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1,c3") // alter source table, drop column c3 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -995,26 +998,20 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { s.t.Log("Inserted row after dropping all columns in the source table") // verify we got our two rows, if schema did not match up it will error. - e2e.NormalizeFlowCountQuery(env, connectionGen, 8) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1") + e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) + tableName := "test_simple_cpkey" srcTableName := s.attachSchemaSuffix("test_simple_cpkey") - dstTableName := "test_simple_cpkey" _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1029,7 +1026,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_flow"), - TableNameMapping: map[string]string{srcTableName: dstTableName}, + TableNameMapping: map[string]string{srcTableName: tableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CdcStagingPath: "", @@ -1039,7 +1036,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 10, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1058,26 +1055,20 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { s.t.Log("Inserted 10 rows into the source table") // verify we got our 10 rows - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) - e2e.EnvEqualTables(env, s, dstTableName, "id,c1,c2,t") + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1,c2,t") _, err := s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() + e2e.EnvWaitForEqualTables(env, s, "normalize update", tableName, "id,c1,c2,t") - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") + env.CancelWorkflow() + }() - e2e.RequireEqualTables(s, dstTableName, "id,c1,c2,t") + env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { @@ -1159,8 +1150,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) + tableName := "test_cpkey_toast2" srcTableName := s.attachSchemaSuffix("test_cpkey_toast2") - dstTableName := "test_cpkey_toast2" _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1176,7 +1167,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast2_flow"), - TableNameMapping: map[string]string{srcTableName: dstTableName}, + TableNameMapping: map[string]string{srcTableName: tableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CdcStagingPath: "", @@ -1186,7 +1177,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { require.NoError(s.t, err) limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 10, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1205,25 +1196,18 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { } s.t.Log("Inserted 10 rows into the source table") - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c2,t,t2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize update", tableName, "id,c2,t,t2") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - require.Contains(s.t, err.Error(), "continue as new") - - // verify our updates and delete happened - e2e.RequireEqualTables(s, dstTableName, "id,c1,c2,t,t2") } func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { @@ -1357,9 +1341,9 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - cmpTableName := s.attachSchemaSuffix("test_softdel") - srcTableName := fmt.Sprintf("%s_src", cmpTableName) - dstTableName := "test_softdel" + tableName := "test_softdel" + srcName := "test_softdel_src" + srcTableName := s.attachSchemaSuffix(srcName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1372,7 +1356,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_softdel"), + FlowJobName: s.attachSuffix(tableName), } config := &protos.FlowConnectionConfigs{ @@ -1381,7 +1365,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, - DestinationTableIdentifier: dstTableName, + DestinationTableIdentifier: tableName, }, }, Source: e2e.GeneratePostgresPeer(e2e.PostgresPort), @@ -1392,49 +1376,48 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 3, + ExitAfterRecords: -1, MaxBatchSize: 100, } - wg := sync.WaitGroup{} - wg.Add(1) - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - defer wg.Done() e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, tableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) - // since we delete stuff, create another table to compare with - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", srcName, tableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", + func(ctx context.Context) bool { + pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") + if err != nil { + return false + } + rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }, + ) + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - - wg.Wait() - - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_softdel", "id,c1,c2,t") - newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, - s.bqHelper.datasetName, dstTableName) + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", + s.bqHelper.datasetName, tableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, int64(1), numNewRows) @@ -1528,9 +1511,9 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - cmpTableName := s.attachSchemaSuffix("test_softdel_ud") - srcTableName := fmt.Sprintf("%s_src", cmpTableName) - dstTableName := "test_softdel_ud" + srcName := "test_softdel_ud_src" + srcTableName := s.attachSchemaSuffix(srcName) + dstName := "test_softdel_ud" _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1552,7 +1535,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, - DestinationTableIdentifier: dstTableName, + DestinationTableIdentifier: dstName, }, }, Source: e2e.GeneratePostgresPeer(e2e.PostgresPort), @@ -1563,7 +1546,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 4, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1575,7 +1558,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, dstName, "id,c1,c2,t") insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1585,28 +1568,31 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - // since we delete stuff, create another table to compare with - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", + func(ctx context.Context) bool { + pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") + if err != nil { + return false + } + rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }, + ) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_softdel_ud", "id,c1,c2,t") newerSyncedAtQuery := fmt.Sprintf(` SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, - s.bqHelper.datasetName, dstTableName) + s.bqHelper.datasetName, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, int64(1), numNewRows) @@ -1616,8 +1602,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { env := e2e.NewTemporalTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(s.t, env) - srcTableName := s.attachSchemaSuffix("test_softdel_iad") - dstTableName := "test_softdel_iad" + tableName := "test_softdel_iad" + srcTableName := s.attachSchemaSuffix(tableName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1630,7 +1616,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_softdel_iad"), + FlowJobName: s.attachSuffix(tableName), } config := &protos.FlowConnectionConfigs{ @@ -1639,7 +1625,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, - DestinationTableIdentifier: dstTableName, + DestinationTableIdentifier: tableName, }, }, Source: e2e.GeneratePostgresPeer(e2e.PostgresPort), @@ -1650,7 +1636,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { } limits := peerflow.CDCFlowLimits{ - ExitAfterRecords: 3, + ExitAfterRecords: -1, MaxBatchSize: 100, } @@ -1662,27 +1648,36 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 1) + e2e.EnvWaitForEqualTables(env, s, "normalize insert", tableName, "id,c1,c2,t") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.NormalizeFlowCountQuery(env, connectionGen, 2) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", + func(ctx context.Context) bool { + pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, tableName, "id,c1,c2,t") + if err != nil { + return false + } + rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }, + ) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1,c2,t") + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - require.Contains(s.t, err.Error(), "continue as new") - // verify our updates and delete happened - e2e.RequireEqualTables(s, "test_softdel_iad", "id,c1,c2,t") - - newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, - s.bqHelper.datasetName, dstTableName) + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", + s.bqHelper.datasetName, tableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, int64(0), numNewRows) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 3c4f7b476e..150e9e0946 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -205,36 +205,6 @@ func SetupCDCFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, } } -func NormalizeFlowCountQuery(env *testsuite.TestWorkflowEnvironment, - connectionGen FlowConnectionGenerationConfig, - minCount int, -) { - // wait for PeerFlowStatusQuery to finish setup - // sleep for 5 second to allow the workflow to start - time.Sleep(5 * time.Second) - for { - response, err := env.QueryWorkflow( - shared.CDCFlowStateQuery, - connectionGen.FlowJobName, - ) - if err == nil { - var state peerflow.CDCFlowWorkflowState - err = response.Get(&state) - if err != nil { - slog.Error(err.Error()) - } - - if len(state.NormalizeFlowStatuses) >= minCount { - break - } - } else { - // log the error for informational purposes - slog.Error(err.Error()) - } - time.Sleep(1 * time.Second) - } -} - func CreateTableForQRep(pool *pgxpool.Pool, suffix string, tableName string) error { tblFields := []string{ "id UUID NOT NULL PRIMARY KEY", From 6809ab3cb856f14128065b79168c7063043f743f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 01:07:48 +0000 Subject: [PATCH 37/67] fix sf exclusion --- flow/e2e/snowflake/peer_flow_sf_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index a784073d2f..9e660a6a13 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1250,7 +1250,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - sfRows, err := s.GetRows("*", "test_exclude_sf") + sfRows, err := s.GetRows(tableName, "*") require.NoError(s.t, err) for _, field := range sfRows.Schema.Fields { From d88f76d0915bd9511b73ae04fbd2fb0f630bf3f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 01:30:57 +0000 Subject: [PATCH 38/67] more helper, missed a CancelWorkflow --- flow/e2e/bigquery/peer_flow_bq_test.go | 2 ++ flow/e2e/test_utils.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 0a2c63a102..bc89806b44 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1586,6 +1586,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) }, ) + + env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 150e9e0946..339680b1f5 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -160,6 +160,8 @@ func EnvWaitForEqualTablesWithNames( t.Helper() EnvWaitFor(t, env, time.Minute, reason, func(ctx context.Context) bool { + t.Helper() + suffix := suite.Suffix() pool := suite.Pool() pgRows, err := GetPgRows(pool, suffix, srcTable, cols) From 8eae927d89125ad5999b447f1bbdd6cd75315937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 01:57:29 +0000 Subject: [PATCH 39/67] improve error messages causing test failures --- flow/e2e/bigquery/peer_flow_bq_test.go | 4 ++-- flow/e2e/test_utils.go | 26 +++++--------------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index bc89806b44..899818bc95 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1592,8 +1592,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.datasetName, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 339680b1f5..52ad31419a 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -70,18 +70,7 @@ func EnvNoError(t *testing.T, env *testsuite.TestWorkflowEnvironment, err error) t.Helper() if err != nil { - t.Error(err.Error()) - env.CancelWorkflow() - runtime.Goexit() - } -} - -// See EnvNoError -func EnvEqual[T comparable](t *testing.T, env *testsuite.TestWorkflowEnvironment, x T, y T) { - t.Helper() - - if x != y { - t.Error("not equal", x, y) + t.Error("UNEXPECTED ERROR", err.Error()) env.CancelWorkflow() runtime.Goexit() } @@ -91,7 +80,7 @@ func EnvTrue(t *testing.T, env *testsuite.TestWorkflowEnvironment, val bool) { t.Helper() if !val { - t.Error("assertion failed") + t.Error("UNEXPECTED FALSE") env.CancelWorkflow() runtime.Goexit() } @@ -539,14 +528,9 @@ func RequireEqualRecordBatches(t *testing.T, q *model.QRecordBatch, other *model require.True(t, e2eshared.CheckEqualRecordBatches(t, q, other)) } -// See EnvNoError func EnvEqualRecordBatches(t *testing.T, env *testsuite.TestWorkflowEnvironment, q *model.QRecordBatch, other *model.QRecordBatch) { t.Helper() - - if !e2eshared.CheckEqualRecordBatches(t, q, other) { - env.CancelWorkflow() - runtime.Goexit() - } + EnvTrue(t, env, e2eshared.CheckEqualRecordBatches(t, q, other)) } func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, reason string, f func(ctx context.Context) bool) { @@ -555,10 +539,10 @@ func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout ti ctx, cleanup := context.WithTimeout(context.Background(), timeout) defer cleanup() deadline, _ := ctx.Deadline() + t.Log("WaitFor", reason) for !f(ctx) { - t.Log(time.Now(), deadline) if time.Now().Compare(deadline) >= 0 { - t.Error("WaitFor timed out", reason) + t.Error("UNEXPECTED TIMEOUT", reason) env.CancelWorkflow() runtime.Goexit() } From d7c329a502f09a89615f1a5473943212223db1bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 02:40:33 +0000 Subject: [PATCH 40/67] fixes --- flow/e2e/bigquery/peer_flow_bq_test.go | 17 +++++++++-------- flow/e2e/snowflake/peer_flow_sf_test.go | 5 ++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 899818bc95..5a67edf1fb 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1583,7 +1583,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { if err != nil { return false } - return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + if !e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) { + return false + } + + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", + s.bqHelper.datasetName, dstName) + numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) + return err != nil && numNewRows == 1 }, ) @@ -1591,13 +1599,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - - newerSyncedAtQuery := fmt.Sprintf( - "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", - s.bqHelper.datasetName, dstName) - numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) - require.NoError(s.t, err) - require.Equal(s.t, int64(1), numNewRows) } func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 9e660a6a13..3f3e48a7f5 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1239,9 +1239,9 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,t,t2") _, err = s.pool.Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=0`, srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,t,t2") @@ -1257,7 +1257,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { require.NotEqual(s.t, field.Name, "c2") } require.Equal(s.t, 5, len(sfRows.Schema.Fields)) - require.Equal(s.t, 10, len(sfRows.Records)) } func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { From a9ced641947dbbd40860326a56324d1780f55b6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 03:06:06 +0000 Subject: [PATCH 41/67] column exclusion: include checking c1. debug logging this last test --- flow/e2e/bigquery/peer_flow_bq_test.go | 4 +++- flow/e2e/snowflake/peer_flow_sf_test.go | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 5a67edf1fb..d8e22c06aa 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1526,7 +1526,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_softdel_ud"), + FlowJobName: s.attachSuffix(dstName), } config := &protos.FlowConnectionConfigs{ @@ -1587,6 +1587,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { return false } + newrows, _ := s.GetRows(dstName, "id,c1,c2,t") + s.t.Log("rows same", rows.NumRecords, newrows.NumRecords) newerSyncedAtQuery := fmt.Sprintf( "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.datasetName, dstName) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 3f3e48a7f5..0319154dc7 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -1237,13 +1237,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,t,t2") + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1,t,t2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=0`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,t,t2") + e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,c1,t,t2") env.CancelWorkflow() }() From a64df488b15970662569162f2e19066173b973c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 03:26:52 +0000 Subject: [PATCH 42/67] remove datasetName from bqhelper --- flow/e2e/bigquery/bigquery_helper.go | 25 +++++++++++-------------- flow/e2e/bigquery/peer_flow_bq_test.go | 18 +++++++++--------- flow/e2e/snowflake/snowflake_helper.go | 12 ++++++------ 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/flow/e2e/bigquery/bigquery_helper.go b/flow/e2e/bigquery/bigquery_helper.go index c1487e01f6..ec2ef1e4d6 100644 --- a/flow/e2e/bigquery/bigquery_helper.go +++ b/flow/e2e/bigquery/bigquery_helper.go @@ -29,8 +29,6 @@ type BigQueryTestHelper struct { Peer *protos.Peer // client to talk to BigQuery client *bigquery.Client - // dataset to use for testing. - datasetName string } // NewBigQueryTestHelper creates a new BigQueryTestHelper. @@ -51,7 +49,7 @@ func NewBigQueryTestHelper() (*BigQueryTestHelper, error) { return nil, fmt.Errorf("failed to read file: %w", err) } - var config protos.BigqueryConfig + var config *protos.BigqueryConfig err = json.Unmarshal(content, &config) if err != nil { return nil, fmt.Errorf("failed to unmarshal json: %w", err) @@ -60,7 +58,7 @@ func NewBigQueryTestHelper() (*BigQueryTestHelper, error) { // suffix the dataset with the runID to namespace stateful schemas. config.DatasetId = fmt.Sprintf("%s_%d", config.DatasetId, runID) - bqsa, err := peer_bq.NewBigQueryServiceAccount(&config) + bqsa, err := peer_bq.NewBigQueryServiceAccount(config) if err != nil { return nil, fmt.Errorf("failed to create BigQueryServiceAccount: %v", err) } @@ -70,14 +68,13 @@ func NewBigQueryTestHelper() (*BigQueryTestHelper, error) { return nil, fmt.Errorf("failed to create helper BigQuery client: %v", err) } - peer := generateBQPeer(&config) + peer := generateBQPeer(config) return &BigQueryTestHelper{ - runID: runID, - Config: &config, - client: client, - datasetName: config.DatasetId, - Peer: peer, + runID: runID, + Config: config, + client: client, + Peer: peer, }, nil } @@ -115,12 +112,12 @@ func (b *BigQueryTestHelper) datasetExists(datasetName string) (bool, error) { // RecreateDataset recreates the dataset, i.e, deletes it if exists and creates it again. func (b *BigQueryTestHelper) RecreateDataset() error { - exists, err := b.datasetExists(b.datasetName) + exists, err := b.datasetExists(b.Config.DatasetId) if err != nil { return fmt.Errorf("failed to check if dataset %s exists: %w", b.Config.DatasetId, err) } - dataset := b.client.Dataset(b.datasetName) + dataset := b.client.Dataset(b.Config.DatasetId) if exists { err := dataset.DeleteWithContents(context.Background()) if err != nil { @@ -168,7 +165,7 @@ func (b *BigQueryTestHelper) RunCommand(command string) error { // countRows(tableName) returns the number of rows in the given table. func (b *BigQueryTestHelper) countRows(tableName string) (int, error) { - return b.countRowsWithDataset(b.datasetName, tableName, "") + return b.countRowsWithDataset(b.Config.DatasetId, tableName, "") } func (b *BigQueryTestHelper) countRowsWithDataset(dataset, tableName string, nonNullCol string) (int, error) { @@ -445,7 +442,7 @@ func (b *BigQueryTestHelper) CreateTable(tableName string, schema *model.QRecord fields = append(fields, fmt.Sprintf("`%s` %s", field.Name, bqType)) } - command := fmt.Sprintf("CREATE TABLE %s.%s (%s)", b.datasetName, tableName, strings.Join(fields, ", ")) + command := fmt.Sprintf("CREATE TABLE %s.%s (%s)", b.Config.DatasetId, tableName, strings.Join(fields, ", ")) err := b.RunCommand(command) if err != nil { diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index d8e22c06aa..81d9176cd0 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -64,7 +64,7 @@ func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { s.t.FailNow() } - err = s.bqHelper.DropDataset(s.bqHelper.datasetName) + err = s.bqHelper.DropDataset(s.bqHelper.Config.DatasetId) if err != nil { slog.Error("failed to tear down bigquery", slog.Any("error", err)) s.t.FailNow() @@ -75,7 +75,7 @@ func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { func (s PeerFlowE2ETestSuiteBQ) checkJSONValue(tableName, colName, fieldName, value string) error { res, err := s.bqHelper.ExecuteAndProcessQuery(fmt.Sprintf( "SELECT `%s`.%s FROM `%s.%s`;", - colName, fieldName, s.bqHelper.datasetName, tableName)) + colName, fieldName, s.bqHelper.Config.DatasetId, tableName)) if err != nil { return fmt.Errorf("json value check failed: %v", err) } @@ -848,10 +848,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { // We inserted 4 invalid shapes in each. // They should have been filtered out as null on destination - lineCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.datasetName, dstTableName, "line") + lineCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.Config.DatasetId, dstTableName, "line") require.NoError(s.t, err) - polyCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.datasetName, dstTableName, "`polyPoly`") + polyCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.Config.DatasetId, dstTableName, "`polyPoly`") require.NoError(s.t, err) require.Equal(s.t, 6, lineCount) @@ -1277,7 +1277,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { srcTable1Name := s.attachSchemaSuffix("test1_bq") dstTable1Name := "test1_bq" - secondDataset := fmt.Sprintf("%s_2", s.bqHelper.datasetName) + secondDataset := fmt.Sprintf("%s_2", s.bqHelper.Config.DatasetId) srcTable2Name := s.attachSchemaSuffix("test2_bq") dstTable2Name := "test2_bq" @@ -1417,7 +1417,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { newerSyncedAtQuery := fmt.Sprintf( "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", - s.bqHelper.datasetName, tableName) + s.bqHelper.Config.DatasetId, tableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, int64(1), numNewRows) @@ -1501,7 +1501,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { newerSyncedAtQuery := fmt.Sprintf(` SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, - s.bqHelper.datasetName, dstTableName) + s.bqHelper.Config.DatasetId, dstTableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, int64(1), numNewRows) @@ -1591,7 +1591,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { s.t.Log("rows same", rows.NumRecords, newrows.NumRecords) newerSyncedAtQuery := fmt.Sprintf( "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", - s.bqHelper.datasetName, dstName) + s.bqHelper.Config.DatasetId, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) return err != nil && numNewRows == 1 }, @@ -1682,7 +1682,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { newerSyncedAtQuery := fmt.Sprintf( "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", - s.bqHelper.datasetName, tableName) + s.bqHelper.Config.DatasetId, tableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) require.Equal(s.t, int64(0), numNewRows) diff --git a/flow/e2e/snowflake/snowflake_helper.go b/flow/e2e/snowflake/snowflake_helper.go index 8dd9bfa60a..c0ba1be752 100644 --- a/flow/e2e/snowflake/snowflake_helper.go +++ b/flow/e2e/snowflake/snowflake_helper.go @@ -42,13 +42,13 @@ func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { return nil, fmt.Errorf("failed to read file: %w", err) } - var config protos.SnowflakeConfig - err = json.Unmarshal(content, &config) + var config *protos.SnowflakeConfig + err = json.Unmarshal(content, config) if err != nil { return nil, fmt.Errorf("failed to unmarshal json: %w", err) } - peer := generateSFPeer(&config) + peer := generateSFPeer(config) runID, err := shared.RandomUInt64() if err != nil { return nil, fmt.Errorf("failed to generate random uint64: %w", err) @@ -56,7 +56,7 @@ func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { testDatabaseName := fmt.Sprintf("e2e_test_%d", runID) - adminClient, err := connsnowflake.NewSnowflakeClient(context.Background(), &config) + adminClient, err := connsnowflake.NewSnowflakeClient(context.Background(), config) if err != nil { return nil, fmt.Errorf("failed to create Snowflake client: %w", err) } @@ -66,13 +66,13 @@ func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { } config.Database = testDatabaseName - testClient, err := connsnowflake.NewSnowflakeClient(context.Background(), &config) + testClient, err := connsnowflake.NewSnowflakeClient(context.Background(), config) if err != nil { return nil, fmt.Errorf("failed to create Snowflake client: %w", err) } return &SnowflakeTestHelper{ - Config: &config, + Config: config, Peer: peer, adminClient: adminClient, testClient: testClient, From 3b28effa3c8bfa0d4cd4f9847cea838170562955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 03:31:01 +0000 Subject: [PATCH 43/67] more debug --- flow/e2e/bigquery/peer_flow_bq_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 81d9176cd0..a3558e6f45 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1593,6 +1593,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.Config.DatasetId, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) + s.t.Log("count", numNewRows) + if err != nil { + s.t.Error("ERROR ERROR ERROR", err) + } return err != nil && numNewRows == 1 }, ) From 78970090ca285e56da4c4eec89e7b5fd4999b139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 03:37:08 +0000 Subject: [PATCH 44/67] fix --- flow/e2e/snowflake/snowflake_helper.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/e2e/snowflake/snowflake_helper.go b/flow/e2e/snowflake/snowflake_helper.go index c0ba1be752..88ce61e60d 100644 --- a/flow/e2e/snowflake/snowflake_helper.go +++ b/flow/e2e/snowflake/snowflake_helper.go @@ -43,7 +43,7 @@ func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { } var config *protos.SnowflakeConfig - err = json.Unmarshal(content, config) + err = json.Unmarshal(content, &config) if err != nil { return nil, fmt.Errorf("failed to unmarshal json: %w", err) } From e43588719fdd635378f6968327144d651eeb9133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 03:40:48 +0000 Subject: [PATCH 45/67] more logging --- flow/e2e/bigquery/peer_flow_bq_test.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index a3558e6f45..3eb07c76ec 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1593,10 +1593,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.Config.DatasetId, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) - s.t.Log("count", numNewRows) - if err != nil { - s.t.Error("ERROR ERROR ERROR", err) - } + s.t.Log("countcount", numNewRows, err != nil, numNewRows == 1) return err != nil && numNewRows == 1 }, ) From 1ab2f23b2b69528cd81c0efacf73de6a64d7f532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 03:50:00 +0000 Subject: [PATCH 46/67] err != nil --- flow/e2e/bigquery/peer_flow_bq_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 3eb07c76ec..c971d8097b 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1593,7 +1593,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.Config.DatasetId, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) - s.t.Log("countcount", numNewRows, err != nil, numNewRows == 1) + s.t.Log("countcount", numNewRows, err, err != nil, numNewRows == 1) return err != nil && numNewRows == 1 }, ) From cafacfd72024af9539763b2f3bbda878d8176297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 03:50:36 +0000 Subject: [PATCH 47/67] I've managed to once again burn an hour of my time over != vs == --- flow/e2e/bigquery/peer_flow_bq_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index c971d8097b..0a7fcdc9a8 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1593,8 +1593,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.Config.DatasetId, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) - s.t.Log("countcount", numNewRows, err, err != nil, numNewRows == 1) - return err != nil && numNewRows == 1 + return err == nil && numNewRows == 1 }, ) From 4af0d018043f3304ef7b7a1b031b83493e9c62b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 04:03:40 +0000 Subject: [PATCH 48/67] remove leftover logging --- flow/e2e/bigquery/peer_flow_bq_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 0a7fcdc9a8..bfbec22431 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1587,8 +1587,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { return false } - newrows, _ := s.GetRows(dstName, "id,c1,c2,t") - s.t.Log("rows same", rows.NumRecords, newrows.NumRecords) newerSyncedAtQuery := fmt.Sprintf( "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.Config.DatasetId, dstName) From aea6c652bc034ed15c42de1c9ee20b3a6afc35d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 14:18:29 +0000 Subject: [PATCH 49/67] reduce noise from waitfor --- flow/e2e/bigquery/peer_flow_bq_test.go | 14 +++++++------- flow/e2eshared/e2eshared.go | 2 -- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index bfbec22431..c18c14d518 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -923,7 +923,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { e2e.RegisterWorkflowsAndActivities(s.t, env) tableName := "test_simple_schema_changes" - srcTableName := s.attachSchemaSuffix("test_simple_schema_changes") + srcTableName := s.attachSchemaSuffix(tableName) _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -934,7 +934,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_simple_schema_changes"), + FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: tableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, @@ -955,11 +955,11 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { // insert first row. e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) + INSERT INTO %s(c1) VALUES (1)`, srcTableName)) e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted initial row in the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1") + e2e.EnvWaitForEqualTables(env, s, "normalize insert", tableName, "id,c1") // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -967,7 +967,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { e2e.EnvNoError(s.t, env, err) s.t.Log("Altered source table, added column c2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1,c2) VALUES ($1,$2)`, srcTableName), 2, 2) + INSERT INTO %s(c1,c2) VALUES (2,2)`, srcTableName)) e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row with added c2 in the source table") @@ -980,7 +980,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { e2e.EnvNoError(s.t, env, err) s.t.Log("Altered source table, dropped column c2 and added column c3") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1,c3) VALUES ($1,$2)`, srcTableName), 3, 3) + INSERT INTO %s(c1,c3) VALUES (3,3)`, srcTableName)) e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row with added c3 in the source table") @@ -993,7 +993,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { e2e.EnvNoError(s.t, env, err) s.t.Log("Altered source table, dropped column c3") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 4) + INSERT INTO %s(c1) VALUES (4)`, srcTableName)) e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row after dropping all columns in the source table") diff --git a/flow/e2eshared/e2eshared.go b/flow/e2eshared/e2eshared.go index c9a5eca325..84ab661e1b 100644 --- a/flow/e2eshared/e2eshared.go +++ b/flow/e2eshared/e2eshared.go @@ -111,8 +111,6 @@ func CheckEqualRecordBatches(t *testing.T, q *model.QRecordBatch, other *model.Q for i, record := range q.Records { if !CheckQRecordEquality(t, record, other.Records[i]) { t.Logf("Record %d is not equal", i) - t.Logf("Record 1: %v", record) - t.Logf("Record 2: %v", other.Records[i]) return false } } From a20f17812fe3d8f47cc9631e003a77f9848c3661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 14:34:48 +0000 Subject: [PATCH 50/67] 2 minute timeout --- flow/e2e/test_utils.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 52ad31419a..d9bb05a222 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -38,8 +38,8 @@ func RegisterWorkflowsAndActivities(t *testing.T, env *testsuite.TestWorkflowEnv t.Fatalf("unable to create catalog connection pool: %v", err) } - // set a 300 second timeout for the workflow to execute a few runs. - env.SetTestTimeout(300 * time.Second) + // set a 5 minute timeout for the workflow to execute a few runs. + env.SetTestTimeout(5 * time.Minute) env.RegisterWorkflow(peerflow.CDCFlowWorkflowWithConfig) env.RegisterWorkflow(peerflow.SyncFlowWorkflow) @@ -148,7 +148,7 @@ func EnvWaitForEqualTablesWithNames( t := suite.T() t.Helper() - EnvWaitFor(t, env, time.Minute, reason, func(ctx context.Context) bool { + EnvWaitFor(t, env, 2*time.Minute, reason, func(ctx context.Context) bool { t.Helper() suffix := suite.Suffix() From d9ca7634041ba639fe78a23e431b1b0075c0bbe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 14:43:26 +0000 Subject: [PATCH 51/67] skip 5 second sleep at start of SetupCDCFlowStatusQuery --- flow/e2e/bigquery/peer_flow_bq_test.go | 2 +- flow/e2e/test_utils.go | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index c18c14d518..9a6e83a5a2 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -998,7 +998,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { s.t.Log("Inserted row after dropping all columns in the source table") // verify we got our two rows, if schema did not match up it will error. - e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1") + e2e.EnvWaitForEqualTables(env, s, "normalize drop column", tableName, "id,c1") env.CancelWorkflow() }() diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index d9bb05a222..62636dd526 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -169,11 +169,12 @@ func EnvWaitForEqualTablesWithNames( func SetupCDCFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, connectionGen FlowConnectionGenerationConfig, -) { - // wait for PeerFlowStatusQuery to finish setup - // sleep for 5 second to allow the workflow to start - time.Sleep(5 * time.Second) +) error { + // errors expected while PeerFlowStatusQuery is setup + counter := 0 for { + time.Sleep(time.Second) + counter++ response, err := env.QueryWorkflow( shared.CDCFlowStateQuery, connectionGen.FlowJobName, @@ -186,13 +187,14 @@ func SetupCDCFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, } if *state.CurrentFlowState == protos.FlowStatus_STATUS_RUNNING { - break + return nil } - } else { + } else if counter > 15 { + return err + } else if counter > 5 { // log the error for informational purposes slog.Error(err.Error()) } - time.Sleep(1 * time.Second) } } From 225016534f81f7601614fc46c12718ca12922ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 14:47:36 +0000 Subject: [PATCH 52/67] log count --- flow/e2e/bigquery/peer_flow_bq_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 9a6e83a5a2..71230b05f8 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1576,9 +1576,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", func(ctx context.Context) bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") - if err != nil { - return false - } + e2e.EnvNoError(s.t, env, err) rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") if err != nil { return false @@ -1591,6 +1589,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.Config.DatasetId, dstName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) + e2e.EnvNoError(s.t, env, err) + s.t.Log("waiting on _PEERDB_IS_DELETED to be 1, currently", numNewRows) return err == nil && numNewRows == 1 }, ) From a77938b23f27491114c335ccd1cba663979451d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 15:09:43 +0000 Subject: [PATCH 53/67] cleanup code, move cdc timeout into code, raise to 30, fix teardown --- .github/workflows/flow.yml | 1 - flow/e2e/congen.go | 36 ++++++++++++++++++------------------ flow/e2e/test_utils.go | 2 +- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/.github/workflows/flow.yml b/.github/workflows/flow.yml index 777b23039c..3e3a4a3c77 100644 --- a/.github/workflows/flow.yml +++ b/.github/workflows/flow.yml @@ -126,4 +126,3 @@ jobs: PEERDB_CATALOG_USER: postgres PEERDB_CATALOG_PASSWORD: postgres PEERDB_CATALOG_DATABASE: postgres - PEERDB_CDC_IDLE_TIMEOUT_SECONDS: 10 diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index c6300301cd..10f91c162f 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -139,18 +139,16 @@ func TearDownPostgres(pool *pgxpool.Pool, suffix string) error { // drop the e2e_test schema if pool != nil { deadline := time.Now().Add(time.Minute) - var err error for { - err = cleanPostgres(pool, suffix) - if time.Now().Compare(deadline) > 0 { - break + err := cleanPostgres(pool, suffix) + if err == nil { + pool.Close() + return nil + } else if time.Now().After(deadline) { + return err } time.Sleep(time.Second) } - if err != nil { - return err - } - pool.Close() } return nil } @@ -196,7 +194,7 @@ func GenerateSnowflakePeer(snowflakeConfig *protos.SnowflakeConfig) (*protos.Pee return ret, nil } -func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs() (*protos.FlowConnectionConfigs, error) { +func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs() *protos.FlowConnectionConfigs { tblMappings := []*protos.TableMapping{} for k, v := range c.TableNameMapping { tblMappings = append(tblMappings, &protos.TableMapping{ @@ -205,18 +203,20 @@ func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs() (*proto }) } - ret := &protos.FlowConnectionConfigs{} - ret.FlowJobName = c.FlowJobName - ret.TableMappings = tblMappings - ret.Source = GeneratePostgresPeer(c.PostgresPort) - ret.Destination = c.Destination - ret.CdcStagingPath = c.CdcStagingPath - ret.SoftDelete = c.SoftDelete + ret := &protos.FlowConnectionConfigs{ + FlowJobName: c.FlowJobName, + TableMappings: tblMappings, + Source: GeneratePostgresPeer(c.PostgresPort), + Destination: c.Destination, + CdcStagingPath: c.CdcStagingPath, + SoftDelete: c.SoftDelete, + SyncedAtColName: "_PEERDB_SYNCED_AT", + IdleTimeoutSeconds: 30, + } if ret.SoftDelete { ret.SoftDeleteColName = "_PEERDB_IS_DELETED" } - ret.SyncedAtColName = "_PEERDB_SYNCED_AT" - return ret, nil + return ret } type QRepFlowConnectionGenerationConfig struct { diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 62636dd526..4aa058c06f 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -543,7 +543,7 @@ func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout ti deadline, _ := ctx.Deadline() t.Log("WaitFor", reason) for !f(ctx) { - if time.Now().Compare(deadline) >= 0 { + if time.Now().After(deadline) { t.Error("UNEXPECTED TIMEOUT", reason) env.CancelWorkflow() runtime.Goexit() From ae100424874c76be9a9aa9cf2f7fbfa2e04ea931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 15:13:41 +0000 Subject: [PATCH 54/67] test reverting WaitForCancellation: true --- flow/workflows/cdc_flow.go | 14 +++++--------- flow/workflows/sync_flow.go | 3 --- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index 35a2f2c613..2b6e78aee3 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -216,8 +216,7 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, - WaitForCancellation: true, + SearchAttributes: mirrorNameSearch, } setupFlowCtx := workflow.WithChildOptions(ctx, childSetupFlowOpts) setupFlowFuture := workflow.ExecuteChildWorkflow(setupFlowCtx, SetupFlowWorkflow, cfg) @@ -243,9 +242,8 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - TaskQueue: taskQueue, - SearchAttributes: mirrorNameSearch, - WaitForCancellation: true, + TaskQueue: taskQueue, + SearchAttributes: mirrorNameSearch, } snapshotFlowCtx := workflow.WithChildOptions(ctx, childSnapshotFlowOpts) snapshotFlowFuture := workflow.ExecuteChildWorkflow(snapshotFlowCtx, SnapshotFlowWorkflow, cfg) @@ -394,8 +392,7 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, - WaitForCancellation: true, + SearchAttributes: mirrorNameSearch, } syncCtx := workflow.WithChildOptions(ctx, childSyncFlowOpts) syncFlowOptions.RelationMessageMapping = state.RelationMessageMapping @@ -467,8 +464,7 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, - WaitForCancellation: true, + SearchAttributes: mirrorNameSearch, } normCtx := workflow.WithChildOptions(ctx, childNormalizeFlowOpts) childNormalizeFlowFuture := workflow.ExecuteChildWorkflow( diff --git a/flow/workflows/sync_flow.go b/flow/workflows/sync_flow.go index 0b82ca4c22..09849b1752 100644 --- a/flow/workflows/sync_flow.go +++ b/flow/workflows/sync_flow.go @@ -41,7 +41,6 @@ func (s *SyncFlowExecution) executeSyncFlow( syncMetaCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 1 * time.Minute, - WaitForCancellation: true, }) // execute GetLastSyncedID on destination peer @@ -66,7 +65,6 @@ func (s *SyncFlowExecution) executeSyncFlow( startFlowCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 72 * time.Hour, HeartbeatTimeout: 30 * time.Second, - WaitForCancellation: true, }) // execute StartFlow on the peers to start the flow @@ -85,7 +83,6 @@ func (s *SyncFlowExecution) executeSyncFlow( replayTableSchemaDeltaCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 30 * time.Minute, - WaitForCancellation: true, }) replayTableSchemaInput := &protos.ReplayTableSchemaDeltaInput{ FlowConnectionConfigs: config, From 6b783ff0d7f5e83dc72f273fac975cf9d9650eff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 15:28:55 +0000 Subject: [PATCH 55/67] fix type errors --- flow/e2e/bigquery/peer_flow_bq_test.go | 89 ++++++++++--------------- flow/e2e/postgres/peer_flow_pg_test.go | 38 +++++------ flow/e2e/s3/cdc_s3_test.go | 6 +- flow/e2e/snowflake/peer_flow_sf_test.go | 85 ++++++++++------------- flow/e2e/test_utils.go | 11 +-- 5 files changed, 96 insertions(+), 133 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 71230b05f8..8884508449 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -232,8 +232,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 0, @@ -274,8 +273,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 0, @@ -319,8 +317,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 10, @@ -330,7 +327,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { testKey := fmt.Sprintf("test_key_%d", i) @@ -385,8 +382,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 4, @@ -396,7 +392,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* Executing a transaction which 1. changes both toast column @@ -452,8 +448,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 0, @@ -464,7 +459,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ() { // and execute a transaction touching toast columns done := make(chan struct{}) go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* transaction updating no rows */ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` BEGIN; @@ -515,8 +510,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 11, @@ -526,7 +520,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // complex transaction with random DMLs on a table with toast columns _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` BEGIN; @@ -587,8 +581,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 6, @@ -598,7 +591,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // complex transaction with random DMLs on a table with toast columns _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` BEGIN; @@ -654,8 +647,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 4, @@ -665,7 +657,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* transaction updating a single row multiple times with changed/unchanged toast columns @@ -720,8 +712,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 1, @@ -731,7 +722,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* test inserting various types*/ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s SELECT 2,2,b'1',b'101', @@ -800,8 +791,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 10, @@ -811,7 +801,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 4 invalid shapes and 6 valid shapes into the source table for i := 0; i < 4; i++ { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -881,8 +871,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 2, @@ -892,7 +881,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* inserting across multiple tables*/ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); @@ -941,8 +930,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -953,7 +941,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { // and then insert and mutate schema repeatedly. go func() { // insert first row. - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES (1)`, srcTableName)) e2e.EnvNoError(s.t, env, err) @@ -1032,8 +1020,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -1043,7 +1030,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { testValue := fmt.Sprintf("test_value_%d", i) @@ -1098,8 +1085,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 20, @@ -1109,7 +1095,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) rowsTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1173,8 +1159,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -1184,7 +1169,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -1233,8 +1218,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { SoftDelete: true, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 2, @@ -1242,7 +1226,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { } go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 1 row into the source table testKey := fmt.Sprintf("test_key_%d", 1) testValue := fmt.Sprintf("test_value_%d", 1) @@ -1298,8 +1282,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { CdcStagingPath: "", } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 2, @@ -1309,7 +1292,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* inserting across multiple tables*/ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); @@ -1383,7 +1366,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) @@ -1469,7 +1452,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1553,7 +1536,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) @@ -1646,7 +1629,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 73da88436f..b6358fcae5 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -65,8 +65,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Flow_PG() { Destination: s.peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 10, @@ -76,7 +75,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Flow_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { testKey := fmt.Sprintf("test_key_%d", i) @@ -147,8 +146,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { Destination: s.peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -159,7 +157,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { // and then insert and mutate schema repeatedly. go func() { // insert first row. - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) @@ -283,8 +281,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { Destination: s.peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -294,7 +291,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { testValue := fmt.Sprintf("test_value_%d", i) @@ -353,8 +350,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { Destination: s.peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 20, @@ -364,7 +360,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) rowsTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -432,8 +428,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { Destination: s.peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -443,7 +438,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -498,8 +493,7 @@ func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { SoftDelete: true, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 2, @@ -507,7 +501,7 @@ func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { } go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 1 row into the source table testKey := fmt.Sprintf("test_key_%d", 1) testValue := fmt.Sprintf("test_value_%d", 1) @@ -582,7 +576,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) @@ -672,7 +666,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_IUD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -757,7 +751,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) @@ -844,7 +838,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) diff --git a/flow/e2e/s3/cdc_s3_test.go b/flow/e2e/s3/cdc_s3_test.go index 4c5d8feb94..b26e44ad25 100644 --- a/flow/e2e/s3/cdc_s3_test.go +++ b/flow/e2e/s3/cdc_s3_test.go @@ -40,8 +40,7 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_Simple_Flow_S3() { Destination: s.s3Helper.GetPeer(), } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 4, @@ -50,8 +49,7 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_Simple_Flow_S3() { } go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) - e2e.EnvNoError(s.t, env, err) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 20 rows for i := 1; i <= 20; i++ { testKey := fmt.Sprintf("test_key_%d", i) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 0319154dc7..e70beddb0f 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -150,8 +150,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -161,7 +160,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 20 rows into the source table go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 20 rows into the source table for i := 0; i < 20; i++ { testKey := fmt.Sprintf("test_key_%d", i) @@ -216,8 +215,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 20, @@ -227,7 +225,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 20 rows into the source table go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 20 rows into the source table for i := 0; i < 20; i++ { testKey := fmt.Sprintf("test_key_%d", i) @@ -278,8 +276,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 10, @@ -289,7 +286,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 4 invalid shapes and 6 valid shapes into the source table for i := 0; i < 4; i++ { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -359,8 +356,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 4, @@ -370,7 +366,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* Executing a transaction which 1. changes both toast column @@ -426,8 +422,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 0, @@ -439,7 +434,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF() { go func() { defer wg.Done() - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* transaction updating no rows */ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` BEGIN; @@ -487,8 +482,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 11, @@ -498,7 +492,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // complex transaction with random DMLs on a table with toast columns _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` BEGIN; @@ -558,8 +552,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 6, @@ -569,7 +562,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // complex transaction with random DMLs on a table with toast columns _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` BEGIN; @@ -624,8 +617,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 4, @@ -635,7 +627,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* transaction updating a single row multiple times with changed/unchanged toast columns @@ -690,8 +682,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 1, @@ -701,7 +692,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* test inserting various types*/ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s SELECT 2,2,b'1',b'101', @@ -769,8 +760,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 2, @@ -780,7 +770,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* inserting across multiple tables*/ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); @@ -826,8 +816,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -837,7 +826,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) e2e.EnvNoError(s.t, env, err) @@ -1003,8 +992,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -1014,7 +1002,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { testValue := fmt.Sprintf("test_value_%d", i) @@ -1066,8 +1054,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: 20, @@ -1077,7 +1064,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) rowsTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1141,8 +1128,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -1152,7 +1138,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -1225,7 +1211,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -1306,7 +1292,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) @@ -1386,7 +1372,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_IUD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) insertTx, err := s.pool.Begin(context.Background()) e2e.EnvNoError(s.t, env, err) @@ -1470,7 +1456,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) @@ -1557,7 +1543,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) @@ -1616,8 +1602,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { Destination: s.sfHelper.Peer, } - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(s.t, err) + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() limits := peerflow.CDCFlowLimits{ ExitAfterRecords: -1, @@ -1627,7 +1612,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 20 rows into the source table go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) // insert 20 rows into the source table for i := 0; i < 20; i++ { testKey := fmt.Sprintf("test_key_%d", i) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 4aa058c06f..dbc7b6f5cb 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -167,9 +167,10 @@ func EnvWaitForEqualTablesWithNames( }) } -func SetupCDCFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, +func SetupCDCFlowStatusQuery(t *testing.T, env *testsuite.TestWorkflowEnvironment, connectionGen FlowConnectionGenerationConfig, -) error { +) { + t.Helper() // errors expected while PeerFlowStatusQuery is setup counter := 0 for { @@ -187,10 +188,12 @@ func SetupCDCFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, } if *state.CurrentFlowState == protos.FlowStatus_STATUS_RUNNING { - return nil + return } } else if counter > 15 { - return err + t.Error("UNEXPECTED SETUP CDC TIMEOUT", err.Error()) + env.CancelWorkflow() + runtime.Goexit() } else if counter > 5 { // log the error for informational purposes slog.Error(err.Error()) From c95180e69c4e3ebeaa1402f3bf5b5fd19a982e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 15:49:39 +0000 Subject: [PATCH 56/67] adjust timeouts, refactor WaitFuncSchema into WaitForSchema --- flow/e2e/bigquery/peer_flow_bq_test.go | 86 ++++++++-------- flow/e2e/postgres/peer_flow_pg_test.go | 136 ++++++++++++------------- flow/e2e/test_utils.go | 10 +- 3 files changed, 110 insertions(+), 122 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 8884508449..3a903e476b 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1379,19 +1379,17 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", - func(ctx context.Context) bool { - pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") - if err != nil { - return false - } - rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") - if err != nil { - return false - } - return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) - }, - ) + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { + pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") + if err != nil { + return false + } + rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }) env.CancelWorkflow() }() @@ -1556,27 +1554,25 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", - func(ctx context.Context) bool { - pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") - e2e.EnvNoError(s.t, env, err) - rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") - if err != nil { - return false - } - if !e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) { - return false - } + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize transaction", func() bool { + pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") + e2e.EnvNoError(s.t, env, err) + rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + if !e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) { + return false + } - newerSyncedAtQuery := fmt.Sprintf( - "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", - s.bqHelper.Config.DatasetId, dstName) - numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) - e2e.EnvNoError(s.t, env, err) - s.t.Log("waiting on _PEERDB_IS_DELETED to be 1, currently", numNewRows) - return err == nil && numNewRows == 1 - }, - ) + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", + s.bqHelper.Config.DatasetId, dstName) + numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) + e2e.EnvNoError(s.t, env, err) + s.t.Log("waiting on _PEERDB_IS_DELETED to be 1, currently", numNewRows) + return err == nil && numNewRows == 1 + }) env.CancelWorkflow() }() @@ -1638,19 +1634,17 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", - func(ctx context.Context) bool { - pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, tableName, "id,c1,c2,t") - if err != nil { - return false - } - rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") - if err != nil { - return false - } - return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) - }, - ) + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { + pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, tableName, "id,c1,c2,t") + if err != nil { + return false + } + rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index b6358fcae5..b96e3ffdeb 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -11,6 +11,7 @@ import ( peerflow "github.com/PeerDB-io/peer-flow/workflows" "github.com/jackc/pgx/v5/pgtype" "github.com/stretchr/testify/require" + "go.temporal.io/sdk/testsuite" ) func (s PeerFlowE2ETestSuitePG) attachSchemaSuffix(tableName string) string { @@ -101,14 +102,17 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Flow_PG() { require.NoError(s.t, err) } -func WaitFuncSchema( - s PeerFlowE2ETestSuitePG, +func (s PeerFlowE2ETestSuitePG) WaitForSchema( + env *testsuite.TestWorkflowEnvironment, + reason string, srcTableName string, dstTableName string, cols string, expectedSchema *protos.TableSchema, -) func(context.Context) bool { - return func(ctx context.Context) bool { +) { + s.t.Helper() + e2e.EnvWaitFor(s.t, env, 2*time.Minute, reason, func() bool { + s.t.Helper() output, err := s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ TableIdentifiers: []string{dstTableName}, }) @@ -121,7 +125,7 @@ func WaitFuncSchema( return false } return s.comparePGTables(srcTableName, dstTableName, cols) == nil - } + }) } func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { @@ -163,18 +167,16 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted initial row in the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing first row", - WaitFuncSchema(s, srcTableName, dstTableName, "id,c1", &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT"}, - ColumnTypes: []string{ - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindTimestamp), - }, - PrimaryKeyColumns: []string{"id"}, - }), - ) + s.WaitForSchema(env, "normalizing first row", srcTableName, dstTableName, "id,c1", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), + }, + PrimaryKeyColumns: []string{"id"}, + }) // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -186,19 +188,17 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row with added c2 in the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing altered row", - WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c2", &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2"}, - ColumnTypes: []string{ - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindTimestamp), - string(qvalue.QValueKindInt64), - }, - PrimaryKeyColumns: []string{"id"}, - }), - ) + s.WaitForSchema(env, "normalizing altered row", srcTableName, dstTableName, "id,c1,c2", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), + string(qvalue.QValueKindInt64), + }, + PrimaryKeyColumns: []string{"id"}, + }) // alter source table, add column c3, drop column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -210,20 +210,18 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row with added c3 in the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing dropped column row", - WaitFuncSchema(s, srcTableName, dstTableName, "id,c1,c3", &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2", "c3"}, - ColumnTypes: []string{ - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindTimestamp), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - }, - PrimaryKeyColumns: []string{"id"}, - }), - ) + s.WaitForSchema(env, "normalizing dropped column row", srcTableName, dstTableName, "id,c1,c3", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2", "c3"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + }, + PrimaryKeyColumns: []string{"id"}, + }) // alter source table, drop column c3 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -235,20 +233,18 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { e2e.EnvNoError(s.t, env, err) s.t.Log("Inserted row after dropping all columns in the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalizing 2nd dropped column row", - WaitFuncSchema(s, srcTableName, dstTableName, "id,c1", &protos.TableSchema{ - TableIdentifier: dstTableName, - ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2", "c3"}, - ColumnTypes: []string{ - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindTimestamp), - string(qvalue.QValueKindInt64), - string(qvalue.QValueKindInt64), - }, - PrimaryKeyColumns: []string{"id"}, - }), - ) + s.WaitForSchema(env, "normalizing 2nd dropped column row", srcTableName, dstTableName, "id,c1", &protos.TableSchema{ + TableIdentifier: dstTableName, + ColumnNames: []string{"id", "c1", "_PEERDB_SYNCED_AT", "c2", "c3"}, + ColumnTypes: []string{ + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindTimestamp), + string(qvalue.QValueKindInt64), + string(qvalue.QValueKindInt64), + }, + PrimaryKeyColumns: []string{"id"}, + }) env.CancelWorkflow() }() @@ -302,7 +298,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) @@ -311,7 +307,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize modifications", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize modifications", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) env.CancelWorkflow() @@ -450,7 +446,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil }) _, err = s.pool.Exec(context.Background(), @@ -459,7 +455,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil }) @@ -581,13 +577,13 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) // since we delete stuff, create another table to compare with @@ -598,7 +594,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) @@ -756,7 +752,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) @@ -773,7 +769,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) @@ -843,19 +839,19 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize reinsert", func(ctx context.Context) bool { + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize reinsert", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index dbc7b6f5cb..95e5a8c66f 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -148,7 +148,7 @@ func EnvWaitForEqualTablesWithNames( t := suite.T() t.Helper() - EnvWaitFor(t, env, 2*time.Minute, reason, func(ctx context.Context) bool { + EnvWaitFor(t, env, 2*time.Minute, reason, func() bool { t.Helper() suffix := suite.Suffix() @@ -538,14 +538,12 @@ func EnvEqualRecordBatches(t *testing.T, env *testsuite.TestWorkflowEnvironment, EnvTrue(t, env, e2eshared.CheckEqualRecordBatches(t, q, other)) } -func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, reason string, f func(ctx context.Context) bool) { +func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, reason string, f func() bool) { t.Helper() - ctx, cleanup := context.WithTimeout(context.Background(), timeout) - defer cleanup() - deadline, _ := ctx.Deadline() + deadline := time.Now().Add(timeout) t.Log("WaitFor", reason) - for !f(ctx) { + for !f() { if time.Now().After(deadline) { t.Error("UNEXPECTED TIMEOUT", reason) env.CancelWorkflow() From 2ea6dbd8a5b1a419d1ff21c55a35fd0a544b747b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 15:50:21 +0000 Subject: [PATCH 57/67] Revert "test reverting WaitForCancellation: true" This reverts commit ae100424874c76be9a9aa9cf2f7fbfa2e04ea931. --- flow/workflows/cdc_flow.go | 14 +++++++++----- flow/workflows/sync_flow.go | 3 +++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index 2b6e78aee3..35a2f2c613 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -216,7 +216,8 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } setupFlowCtx := workflow.WithChildOptions(ctx, childSetupFlowOpts) setupFlowFuture := workflow.ExecuteChildWorkflow(setupFlowCtx, SetupFlowWorkflow, cfg) @@ -242,8 +243,9 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - TaskQueue: taskQueue, - SearchAttributes: mirrorNameSearch, + TaskQueue: taskQueue, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } snapshotFlowCtx := workflow.WithChildOptions(ctx, childSnapshotFlowOpts) snapshotFlowFuture := workflow.ExecuteChildWorkflow(snapshotFlowCtx, SnapshotFlowWorkflow, cfg) @@ -392,7 +394,8 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } syncCtx := workflow.WithChildOptions(ctx, childSyncFlowOpts) syncFlowOptions.RelationMessageMapping = state.RelationMessageMapping @@ -464,7 +467,8 @@ func CDCFlowWorkflowWithConfig( RetryPolicy: &temporal.RetryPolicy{ MaximumAttempts: 20, }, - SearchAttributes: mirrorNameSearch, + SearchAttributes: mirrorNameSearch, + WaitForCancellation: true, } normCtx := workflow.WithChildOptions(ctx, childNormalizeFlowOpts) childNormalizeFlowFuture := workflow.ExecuteChildWorkflow( diff --git a/flow/workflows/sync_flow.go b/flow/workflows/sync_flow.go index 09849b1752..0b82ca4c22 100644 --- a/flow/workflows/sync_flow.go +++ b/flow/workflows/sync_flow.go @@ -41,6 +41,7 @@ func (s *SyncFlowExecution) executeSyncFlow( syncMetaCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 1 * time.Minute, + WaitForCancellation: true, }) // execute GetLastSyncedID on destination peer @@ -65,6 +66,7 @@ func (s *SyncFlowExecution) executeSyncFlow( startFlowCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 72 * time.Hour, HeartbeatTimeout: 30 * time.Second, + WaitForCancellation: true, }) // execute StartFlow on the peers to start the flow @@ -83,6 +85,7 @@ func (s *SyncFlowExecution) executeSyncFlow( replayTableSchemaDeltaCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 30 * time.Minute, + WaitForCancellation: true, }) replayTableSchemaInput := &protos.ReplayTableSchemaDeltaInput{ FlowConnectionConfigs: config, From 4056b6279bde7635d4a1330f4280d8a137c8222d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 15:59:38 +0000 Subject: [PATCH 58/67] 30->15, raise pg timeout to 2m --- flow/e2e/congen.go | 2 +- flow/e2e/postgres/peer_flow_pg_test.go | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index 10f91c162f..709decfeac 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -211,7 +211,7 @@ func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs() *protos CdcStagingPath: c.CdcStagingPath, SoftDelete: c.SoftDelete, SyncedAtColName: "_PEERDB_SYNCED_AT", - IdleTimeoutSeconds: 30, + IdleTimeoutSeconds: 15, } if ret.SoftDelete { ret.SoftDeleteColName = "_PEERDB_IS_DELETED" diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index b96e3ffdeb..00a71af3d0 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -298,7 +298,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize 10 rows", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) @@ -307,7 +307,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize modifications", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize modifications", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) env.CancelWorkflow() @@ -446,7 +446,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize 10 rows", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize 10 rows", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil }) _, err = s.pool.Exec(context.Background(), @@ -455,7 +455,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize update", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil }) @@ -577,13 +577,13 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize update", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize update", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) // since we delete stuff, create another table to compare with @@ -594,7 +594,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) @@ -752,7 +752,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) @@ -769,7 +769,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize transaction", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize transaction", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) @@ -839,19 +839,19 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize row", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize reinsert", func() bool { + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize reinsert", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) From 4ce1f80eecaadf86c897523dcbe9c75f920b7a07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 16:14:47 +0000 Subject: [PATCH 59/67] not sure why this delete is finnicky --- flow/e2e/bigquery/peer_flow_bq_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 3a903e476b..ec9c987247 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1555,6 +1555,9 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize transaction", func() bool { + rows2, _ := s.GetRows(dstName, "id,c1,c2,_PEERDB_IS_DELETED") + s.t.Log("ROWS", rows2) + pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") e2e.EnvNoError(s.t, env, err) rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") From 21fd48058f0f484c7cc4272da51f8212130b64ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sat, 6 Jan 2024 16:58:09 +0000 Subject: [PATCH 60/67] remove logging, move count check out of wait for --- flow/e2e/bigquery/peer_flow_bq_test.go | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index ec9c987247..97ecc65fac 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1480,8 +1480,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { // verify our updates and delete happened e2e.RequireEqualTables(s, "test_softdel_iud", "id,c1,c2,t") - newerSyncedAtQuery := fmt.Sprintf(` - SELECT COUNT(*) FROM`+"`%s.%s`"+`WHERE _PEERDB_IS_DELETED`, + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", s.bqHelper.Config.DatasetId, dstTableName) numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) require.NoError(s.t, err) @@ -1555,32 +1555,26 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize transaction", func() bool { - rows2, _ := s.GetRows(dstName, "id,c1,c2,_PEERDB_IS_DELETED") - s.t.Log("ROWS", rows2) - pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") e2e.EnvNoError(s.t, env, err) rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") if err != nil { return false } - if !e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) { - return false - } - - newerSyncedAtQuery := fmt.Sprintf( - "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", - s.bqHelper.Config.DatasetId, dstName) - numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) - e2e.EnvNoError(s.t, env, err) - s.t.Log("waiting on _PEERDB_IS_DELETED to be 1, currently", numNewRows) - return err == nil && numNewRows == 1 + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) }) env.CancelWorkflow() }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) + + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", + s.bqHelper.Config.DatasetId, dstName) + numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) + require.NoError(s.t, err) + require.Equal(s.t, int64(0), numNewRows) } func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { From 838302ac8e1d4bc12c272ac8ecd4726a906dbc5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 11 Jan 2024 15:11:40 +0000 Subject: [PATCH 61/67] Fix 2 tests --- flow/e2e/bigquery/peer_flow_bq_test.go | 2 +- flow/e2e/postgres/peer_flow_pg_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index d1a1b70179..a088fabfb8 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -815,7 +815,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_NaN_Doubles_BQ() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) /* test inserting various types*/ _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s SELECT 2, 'NaN'::double precision, '{NaN, Infinity, -Infinity}'; diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 0c8c06f906..5f10ffee64 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -168,7 +168,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Enums_PG() { } go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(my_mood, my_null_mood) VALUES ('happy',null) `, srcTableName)) From 9f008558da13bb2fadd99cfe93f86b8a0a9e2710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 11 Jan 2024 23:04:14 +0000 Subject: [PATCH 62/67] more logging around TearDownPostgres --- flow/connectors/utils/heartbeat.go | 2 +- flow/e2e/bigquery/peer_flow_bq_test.go | 8 ++------ flow/e2e/congen.go | 18 ++++++++++++------ flow/e2e/postgres/qrep_flow_pg_test.go | 17 +++++++++++++---- flow/e2e/s3/qrep_flow_s3_test.go | 19 ++++++++++++++----- flow/e2e/snowflake/peer_flow_sf_test.go | 10 +++------- .../e2e/sqlserver/qrep_flow_sqlserver_test.go | 17 ++++++++++++++--- flow/e2e/test_utils.go | 10 +++++----- flow/e2eshared/e2eshared.go | 6 +++++- 9 files changed, 69 insertions(+), 38 deletions(-) diff --git a/flow/connectors/utils/heartbeat.go b/flow/connectors/utils/heartbeat.go index 270680ded1..fda4ea06d2 100644 --- a/flow/connectors/utils/heartbeat.go +++ b/flow/connectors/utils/heartbeat.go @@ -30,7 +30,7 @@ func HeartbeatRoutine( } } }() - return func() { shutdown <- struct{}{} } + return func() { close(shutdown) } } // if the functions are being called outside the context of a Temporal workflow, diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index a088fabfb8..6ff99c0d2f 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -62,13 +62,9 @@ func (s PeerFlowE2ETestSuiteBQ) GetRowsWhere(tableName string, colsString string func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { e2eshared.RunSuite(t, setupSuite, func(s PeerFlowE2ETestSuiteBQ) { - err := e2e.TearDownPostgres(s.pool, s.bqSuffix) - if err != nil { - slog.Error("failed to tear down postgres", slog.Any("error", err)) - s.t.FailNow() - } + e2e.TearDownPostgres(s) - err = s.bqHelper.DropDataset(s.bqHelper.Config.DatasetId) + err := s.bqHelper.DropDataset(s.bqHelper.Config.DatasetId) if err != nil { slog.Error("failed to tear down bigquery", slog.Any("error", err)) s.t.FailNow() diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index 709decfeac..f324a8e8f5 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -7,10 +7,12 @@ import ( "time" "github.com/PeerDB-io/peer-flow/connectors/utils" + "github.com/PeerDB-io/peer-flow/e2eshared" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgtype" "github.com/jackc/pgx/v5/pgxpool" + "github.com/stretchr/testify/require" ) const ( @@ -135,22 +137,26 @@ func SetupPostgres(suffix string) (*pgxpool.Pool, error) { return pool, nil } -func TearDownPostgres(pool *pgxpool.Pool, suffix string) error { - // drop the e2e_test schema +func TearDownPostgres[T e2eshared.Suite](s T) { + t := s.T() + t.Helper() + pool := s.Pool() + suffix := s.Suffix() + if pool != nil { - deadline := time.Now().Add(time.Minute) + t.Log("begin tearing down postgres schema", suffix) + deadline := time.Now().Add(2 * time.Minute) for { err := cleanPostgres(pool, suffix) if err == nil { pool.Close() - return nil + return } else if time.Now().After(deadline) { - return err + require.Fail(t, "failed to teardown postgres schema", suffix) } time.Sleep(time.Second) } } - return nil } // GeneratePostgresPeer generates a postgres peer config for testing. diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index e867edb7e6..a8f65f7ec7 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -28,12 +28,21 @@ type PeerFlowE2ETestSuitePG struct { suffix string } +func (s PeerFlowE2ETestSuitePG) T() *testing.T { + return s.t +} + +func (s PeerFlowE2ETestSuitePG) Pool() *pgxpool.Pool { + return s.pool +} + +func (s PeerFlowE2ETestSuitePG) Suffix() string { + return s.suffix +} + func TestPeerFlowE2ETestSuitePG(t *testing.T) { e2eshared.RunSuite(t, SetupSuite, func(s PeerFlowE2ETestSuitePG) { - err := e2e.TearDownPostgres(s.pool, s.suffix) - if err != nil { - require.Fail(s.t, "failed to drop Postgres schema", err) - } + e2e.TearDownPostgres(s) }) } diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go index 6eaa2ebc31..46dd16ef4c 100644 --- a/flow/e2e/s3/qrep_flow_s3_test.go +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -24,13 +24,22 @@ type PeerFlowE2ETestSuiteS3 struct { suffix string } +func (s PeerFlowE2ETestSuiteS3) T() *testing.T { + return s.t +} + +func (s PeerFlowE2ETestSuiteS3) Pool() *pgxpool.Pool { + return s.pool +} + +func (s PeerFlowE2ETestSuiteS3) Suffix() string { + return s.suffix +} + func tearDownSuite(s PeerFlowE2ETestSuiteS3) { - err := e2e.TearDownPostgres(s.pool, s.suffix) - if err != nil { - require.Fail(s.t, "failed to drop Postgres schema", err) - } + e2e.TearDownPostgres(s) - err = s.s3Helper.CleanUp() + err := s.s3Helper.CleanUp() if err != nil { require.Fail(s.t, "failed to clean up s3", err) } diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 6c2dd5510f..056de3a1fb 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -57,21 +57,17 @@ func (s PeerFlowE2ETestSuiteSF) GetRows(tableName string, sfSelector string) (*m func TestPeerFlowE2ETestSuiteSF(t *testing.T) { e2eshared.RunSuite(t, SetupSuite, func(s PeerFlowE2ETestSuiteSF) { - err := e2e.TearDownPostgres(s.pool, s.pgSuffix) - if err != nil { - slog.Error("failed to tear down Postgres", slog.Any("error", err)) - s.t.FailNow() - } + e2e.TearDownPostgres(s) if s.sfHelper != nil { - err = s.sfHelper.Cleanup() + err := s.sfHelper.Cleanup() if err != nil { slog.Error("failed to tear down Snowflake", slog.Any("error", err)) s.t.FailNow() } } - err = s.connector.Close() + err := s.connector.Close() if err != nil { slog.Error("failed to close Snowflake connector", slog.Any("error", err)) s.t.FailNow() diff --git a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go index 25f11dfc44..f7378ea2ae 100644 --- a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go +++ b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go @@ -29,13 +29,24 @@ type PeerFlowE2ETestSuiteSQLServer struct { suffix string } +func (s PeerFlowE2ETestSuiteSQLServer) T() *testing.T { + return s.t +} + +func (s PeerFlowE2ETestSuiteSQLServer) Pool() *pgxpool.Pool { + return s.pool +} + +func (s PeerFlowE2ETestSuiteSQLServer) Suffix() string { + return s.suffix +} + func TestCDCFlowE2ETestSuiteSQLServer(t *testing.T) { e2eshared.RunSuite(t, SetupSuite, func(s PeerFlowE2ETestSuiteSQLServer) { - err := e2e.TearDownPostgres(s.pool, s.suffix) - require.NoError(s.t, err) + e2e.TearDownPostgres(s) if s.sqlsHelper != nil { - err = s.sqlsHelper.CleanUp() + err := s.sqlsHelper.CleanUp() require.NoError(s.t, err) } }) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 3c103194f2..9baf09f13a 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -185,11 +185,11 @@ func SetupCDCFlowStatusQuery(t *testing.T, env *testsuite.TestWorkflowEnvironmen if err == nil { var state peerflow.CDCFlowWorkflowState err = response.Get(&state) - if err == nil { - if state.CurrentFlowState == protos.FlowStatus_STATUS_RUNNING { - return - } - } else { + if err == nil { + if state.CurrentFlowState == protos.FlowStatus_STATUS_RUNNING { + return + } + } else { slog.Error(err.Error()) } } else if counter > 15 { diff --git a/flow/e2eshared/e2eshared.go b/flow/e2eshared/e2eshared.go index 84ab661e1b..176564d342 100644 --- a/flow/e2eshared/e2eshared.go +++ b/flow/e2eshared/e2eshared.go @@ -12,10 +12,14 @@ import ( "github.com/jackc/pgx/v5/pgxpool" ) -type RowSource interface { +type Suite interface { T() *testing.T Pool() *pgxpool.Pool Suffix() string +} + +type RowSource interface { + Suite GetRows(table, cols string) (*model.QRecordBatch, error) } From afc93675a3935b96b6743c5571d8b22e03c2b3b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Thu, 11 Jan 2024 23:33:52 +0000 Subject: [PATCH 63/67] replace reviewdog action with official golangci action --- .github/workflows/flow.yml | 4 +-- .github/workflows/golang-lint.yml | 43 ++++++++++++++----------------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/.github/workflows/flow.yml b/.github/workflows/flow.yml index 3e3a4a3c77..b296cc6b7c 100644 --- a/.github/workflows/flow.yml +++ b/.github/workflows/flow.yml @@ -38,9 +38,9 @@ jobs: run: | ./generate_protos.sh - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: - go-version: ">=1.21.0" + go-version: "1.21" cache-dependency-path: flow/go.sum - name: install gotestsum diff --git a/.github/workflows/golang-lint.yml b/.github/workflows/golang-lint.yml index 1216a69c3b..63cc061c77 100644 --- a/.github/workflows/golang-lint.yml +++ b/.github/workflows/golang-lint.yml @@ -5,35 +5,30 @@ on: branches: [main] paths: [flow/**] +permissions: + contents: read + jobs: - golangci-lint: - permissions: - checks: write - contents: read - pull-requests: write - strategy: - matrix: - runner: [ubicloud-standard-4-ubuntu-2204-arm] - runs-on: ${{ matrix.runner }} + golangci: + name: lint + runs-on: [ubicloud-standard-4-ubuntu-2204-arm] steps: - - name: checkout - uses: actions/checkout@v4 - with: - submodules: recursive - + - uses: actions/checkout@v4 - uses: bufbuild/buf-setup-action@v1.28.1 - - name: setup protos run: | ./generate_protos.sh - + - name: install lib-geos + run: | + sudo apt-get update + sudo apt-get install libgeos-dev + - uses: actions/setup-go@v5 + with: + go-version: "1.21" + cache: false - name: golangci-lint - uses: reviewdog/action-golangci-lint@v2 + uses: golangci/golangci-lint-action@v3 with: - workdir: ./flow - reporter: github-pr-review - github_token: ${{ secrets.GITHUB_TOKEN }} - golangci_lint_flags: "--timeout 10m" - fail_on_error: true - env: - REVIEWDOG_TOKEN: ${{ secrets.REVIEWDOG_TOKEN }} + version: v1.55 + working-directory: ./flow + args: --timeout=10m From f9f0d0beb5688280f4023ec23b94ae3e4cd14611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 12 Jan 2024 00:55:05 +0000 Subject: [PATCH 64/67] Increase wait for margins, long term need to make heartbeats not be 2 minutes long --- flow/e2e/bigquery/peer_flow_bq_test.go | 6 +++--- flow/e2e/postgres/peer_flow_pg_test.go | 26 +++++++++++++------------- flow/e2e/test_utils.go | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 6ff99c0d2f..4408572ca1 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1446,7 +1446,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") if err != nil { return false @@ -1621,7 +1621,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize transaction", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") e2e.EnvNoError(s.t, env, err) rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") @@ -1698,7 +1698,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, tableName, "id,c1,c2,t") if err != nil { return false diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 5f10ffee64..6d5a549854 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -56,7 +56,7 @@ func (s PeerFlowE2ETestSuitePG) WaitForSchema( expectedSchema *protos.TableSchema, ) { s.t.Helper() - e2e.EnvWaitFor(s.t, env, 2*time.Minute, reason, func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, reason, func() bool { s.t.Helper() output, err := s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ TableIdentifiers: []string{dstTableName}, @@ -359,7 +359,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize 10 rows", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) @@ -368,7 +368,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { e2e.EnvNoError(s.t, env, err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize modifications", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize modifications", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) env.CancelWorkflow() @@ -507,7 +507,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { } s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize 10 rows", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil }) _, err = s.pool.Exec(context.Background(), @@ -516,7 +516,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize update", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize update", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil }) @@ -638,13 +638,13 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize row", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize update", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize update", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) // since we delete stuff, create another table to compare with @@ -655,7 +655,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) @@ -813,7 +813,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize row", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) @@ -830,7 +830,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize transaction", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) @@ -900,19 +900,19 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize row", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil }) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize reinsert", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize reinsert", func() bool { return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil }) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 9baf09f13a..cbb7bb52ed 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -150,7 +150,7 @@ func EnvWaitForEqualTablesWithNames( t := suite.T() t.Helper() - EnvWaitFor(t, env, 2*time.Minute, reason, func() bool { + EnvWaitFor(t, env, 3*time.Minute, reason, func() bool { t.Helper() suffix := suite.Suffix() From 7ad14d6c43fc1c4d5cdc57a43cf23ab5631a5877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 12 Jan 2024 16:19:42 +0000 Subject: [PATCH 65/67] cleanup SetupCDCFlowStatusQuery control flow --- flow/e2e/test_utils.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index cbb7bb52ed..c382e015fd 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -185,12 +185,10 @@ func SetupCDCFlowStatusQuery(t *testing.T, env *testsuite.TestWorkflowEnvironmen if err == nil { var state peerflow.CDCFlowWorkflowState err = response.Get(&state) - if err == nil { - if state.CurrentFlowState == protos.FlowStatus_STATUS_RUNNING { - return - } - } else { + if err != nil { slog.Error(err.Error()) + } else if state.CurrentFlowState == protos.FlowStatus_STATUS_RUNNING { + return } } else if counter > 15 { t.Error("UNEXPECTED SETUP CDC TIMEOUT", err.Error()) From 5b70e27504800693152a1ba044b57867e79af356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 12 Jan 2024 16:53:08 +0000 Subject: [PATCH 66/67] bq is slow --- flow/e2e/bigquery/peer_flow_bq_test.go | 6 +++--- flow/e2e/test_utils.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 4408572ca1..a2d096a8f8 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1446,7 +1446,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") if err != nil { return false @@ -1621,7 +1621,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "normalize transaction", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") e2e.EnvNoError(s.t, env, err) rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") @@ -1698,7 +1698,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, tableName, "id,c1,c2,t") if err != nil { return false diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index c382e015fd..7703d142f5 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -150,7 +150,7 @@ func EnvWaitForEqualTablesWithNames( t := suite.T() t.Helper() - EnvWaitFor(t, env, 3*time.Minute, reason, func() bool { + EnvWaitFor(t, env, 4*time.Minute, reason, func() bool { t.Helper() suffix := suite.Suffix() From 00d00850098596d44cd45f5c56d0c2cf483a7685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 12 Jan 2024 18:19:52 +0000 Subject: [PATCH 67/67] Theory: big query isn't slow, we're just getting hung up on caching --- flow/connectors/postgres/cdc.go | 2 +- flow/e2e/bigquery/bigquery_helper.go | 25 ++++++++++++++++--------- flow/e2e/bigquery/peer_flow_bq_test.go | 6 +++--- flow/e2e/test_utils.go | 6 +++--- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/flow/connectors/postgres/cdc.go b/flow/connectors/postgres/cdc.go index fc6c665bda..4fd1b3dd79 100644 --- a/flow/connectors/postgres/cdc.go +++ b/flow/connectors/postgres/cdc.go @@ -263,8 +263,8 @@ func (p *PostgresCDCSource) consumeStream( if cdcRecordsStorage.Len() == 1 { records.SignalAsNotEmpty() - p.logger.Info(fmt.Sprintf("pushing the standby deadline to %s", time.Now().Add(standbyMessageTimeout))) nextStandbyMessageDeadline = time.Now().Add(standbyMessageTimeout) + p.logger.Info(fmt.Sprintf("pushing the standby deadline to %s", nextStandbyMessageDeadline)) } return nil } diff --git a/flow/e2e/bigquery/bigquery_helper.go b/flow/e2e/bigquery/bigquery_helper.go index 970960e0f8..267ff3d69d 100644 --- a/flow/e2e/bigquery/bigquery_helper.go +++ b/flow/e2e/bigquery/bigquery_helper.go @@ -155,7 +155,9 @@ func (b *BigQueryTestHelper) DropDataset(datasetName string) error { // RunCommand runs the given command. func (b *BigQueryTestHelper) RunCommand(command string) error { - _, err := b.client.Query(command).Read(context.Background()) + q := b.client.Query(command) + q.DisableQueryCache = true + _, err := q.Read(context.Background()) if err != nil { return fmt.Errorf("failed to run command: %w", err) } @@ -174,7 +176,9 @@ func (b *BigQueryTestHelper) countRowsWithDataset(dataset, tableName string, non command = fmt.Sprintf("SELECT COUNT(CASE WHEN " + nonNullCol + " IS NOT NULL THEN 1 END) AS non_null_count FROM `" + dataset + "." + tableName + "`;") } - it, err := b.client.Query(command).Read(context.Background()) + q := b.client.Query(command) + q.DisableQueryCache = true + it, err := q.Read(context.Background()) if err != nil { return 0, fmt.Errorf("failed to run command: %w", err) } @@ -302,7 +306,9 @@ func bqSchemaToQRecordSchema(schema bigquery.Schema) (*model.QRecordSchema, erro } func (b *BigQueryTestHelper) ExecuteAndProcessQuery(query string) (*model.QRecordBatch, error) { - it, err := b.client.Query(query).Read(context.Background()) + q := b.client.Query(query) + q.DisableQueryCache = true + it, err := q.Read(context.Background()) if err != nil { return nil, fmt.Errorf("failed to run command: %w", err) } @@ -355,10 +361,7 @@ func (b *BigQueryTestHelper) ExecuteAndProcessQuery(query string) (*model.QRecor }, nil } -/* -if the function errors or there are nulls, the function returns false -else true -*/ +// returns whether the function errors or there are nulls func (b *BigQueryTestHelper) CheckNull(tableName string, ColName []string) (bool, error) { if len(ColName) == 0 { return true, nil @@ -366,7 +369,9 @@ func (b *BigQueryTestHelper) CheckNull(tableName string, ColName []string) (bool joinedString := strings.Join(ColName, " is null or ") + " is null" command := fmt.Sprintf("SELECT COUNT(*) FROM `%s.%s` WHERE %s", b.Config.DatasetId, tableName, joinedString) - it, err := b.client.Query(command).Read(context.Background()) + q := b.client.Query(command) + q.DisableQueryCache = true + it, err := q.Read(context.Background()) if err != nil { return false, fmt.Errorf("failed to run command: %w", err) } @@ -398,7 +403,9 @@ func (b *BigQueryTestHelper) CheckDoubleValues(tableName string, ColName []strin csep := strings.Join(ColName, ",") command := fmt.Sprintf("SELECT %s FROM `%s.%s`", csep, b.Config.DatasetId, tableName) - it, err := b.client.Query(command).Read(context.Background()) + q := b.client.Query(command) + q.DisableQueryCache = true + it, err := q.Read(context.Background()) if err != nil { return false, fmt.Errorf("failed to run command: %w", err) } diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index a2d096a8f8..4408572ca1 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1446,7 +1446,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 5*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") if err != nil { return false @@ -1621,7 +1621,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, 5*time.Minute, "normalize transaction", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, srcName, "id,c1,c2,t") e2e.EnvNoError(s.t, env, err) rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") @@ -1698,7 +1698,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 5*time.Minute, "normalize delete", func() bool { + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.pool, s.bqSuffix, tableName, "id,c1,c2,t") if err != nil { return false diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 7703d142f5..641fa594e5 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -150,7 +150,7 @@ func EnvWaitForEqualTablesWithNames( t := suite.T() t.Helper() - EnvWaitFor(t, env, 4*time.Minute, reason, func() bool { + EnvWaitFor(t, env, 3*time.Minute, reason, func() bool { t.Helper() suffix := suite.Suffix() @@ -548,12 +548,12 @@ func EnvEqualRecordBatches(t *testing.T, env *testsuite.TestWorkflowEnvironment, func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, reason string, f func() bool) { t.Helper() + t.Log("WaitFor", reason, time.Now()) deadline := time.Now().Add(timeout) - t.Log("WaitFor", reason) for !f() { if time.Now().After(deadline) { - t.Error("UNEXPECTED TIMEOUT", reason) + t.Error("UNEXPECTED TIMEOUT", reason, time.Now()) env.CancelWorkflow() runtime.Goexit() }