From 449b86fb7c52fe2a93d07af048fed3d3cd1778b4 Mon Sep 17 00:00:00 2001 From: Kevin Biju <52661649+heavycrystal@users.noreply.github.com> Date: Fri, 1 Mar 2024 02:26:01 +0530 Subject: [PATCH 01/13] moving to snapshot status when adding table (#1404) Co-authored-by: Kaushik Iska --- flow/workflows/cdc_flow.go | 1 + 1 file changed, 1 insertion(+) diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index 1d4f2bea30..c9825d4e80 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -157,6 +157,7 @@ func (w *CDCFlowWorkflowExecution) processCDCFlowConfigUpdate(ctx workflow.Conte w.logger.Warn("duplicate source/destination tables found in additionalTables") return nil } + state.CurrentFlowStatus = protos.FlowStatus_STATUS_SNAPSHOT alterPublicationAddAdditionalTablesCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 5 * time.Minute, From 904b513fcf6c251f7778813ec62905c195f0c9f8 Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj Date: Fri, 1 Mar 2024 16:49:17 +0530 Subject: [PATCH 02/13] Datatypes: fix inet and cidr (#1412) --- flow/connectors/postgres/qvalue_convert.go | 11 ++++++++--- flow/e2e/bigquery/peer_flow_bq_test.go | 2 +- flow/e2e/test_utils.go | 10 ++++++++-- flow/model/qrecord_batch.go | 8 ++++++++ flow/model/qvalue/avro_converter.go | 2 +- flow/model/qvalue/qvalue.go | 2 +- 6 files changed, 27 insertions(+), 8 deletions(-) diff --git a/flow/connectors/postgres/qvalue_convert.go b/flow/connectors/postgres/qvalue_convert.go index d54e6fa4d3..defc2f492f 100644 --- a/flow/connectors/postgres/qvalue_convert.go +++ b/flow/connectors/postgres/qvalue_convert.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "math/big" + "net/netip" "strings" "time" @@ -299,20 +300,24 @@ func parseFieldFromQValueKind(qvalueKind qvalue.QValueKind, value interface{}) ( return qvalue.QValue{}, fmt.Errorf("failed to parse UUID: %v", value) } case qvalue.QValueKindINET: - switch value.(type) { + switch v := value.(type) { case string: val = qvalue.QValue{Kind: qvalue.QValueKindINET, Value: value} case [16]byte: val = qvalue.QValue{Kind: qvalue.QValueKindINET, Value: value} + case netip.Prefix: + val = qvalue.QValue{Kind: qvalue.QValueKindINET, Value: v.String()} default: - return qvalue.QValue{}, fmt.Errorf("failed to parse INET: %v", value) + return qvalue.QValue{}, fmt.Errorf("failed to parse INET: %v", v) } case qvalue.QValueKindCIDR: - switch value.(type) { + switch v := value.(type) { case string: val = qvalue.QValue{Kind: qvalue.QValueKindCIDR, Value: value} case [16]byte: val = qvalue.QValue{Kind: qvalue.QValueKindCIDR, Value: value} + case netip.Prefix: + val = qvalue.QValue{Kind: qvalue.QValueKindCIDR, Value: v.String()} default: return qvalue.QValue{}, fmt.Errorf("failed to parse CIDR: %v", value) } diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index cdf7f1d021..9d0b8f6a57 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -588,7 +588,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s SELECT 2,2,b'1',b'101', true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, + CURRENT_DATE,1.23,1.234,'10.0.0.0/32'::inet,1, '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, '{"sai":-8.02139037433155}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 36a0e4d7c9..598297106b 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -288,6 +288,8 @@ func CreateTableForQRep(conn *pgx.Conn, suffix string, tableName string) error { "myreal REAL", "myreal2 REAL", "myreal3 REAL", + "myinet INET", + "mycidr CIDR", } tblFieldStr := strings.Join(tblFields, ",") var pgErr *pgconn.PgError @@ -351,7 +353,9 @@ func PopulateSourceTable(conn *pgx.Conn, suffix string, tableName string, rowCou 'NaN', 3.14159, 1, - 1.0 + 1.0, + '10.0.0.0/32', + '1.1.10.2'::cidr )`, id, uuid.New().String(), uuid.New().String(), uuid.New().String(), uuid.New().String(), uuid.New().String(), uuid.New().String()) @@ -372,7 +376,9 @@ func PopulateSourceTable(conn *pgx.Conn, suffix string, tableName string, rowCou nannu, myreal, myreal2, - myreal3 + myreal3, + myinet, + mycidr ) VALUES %s; `, suffix, tableName, strings.Join(rows, ","))) if err != nil { diff --git a/flow/model/qrecord_batch.go b/flow/model/qrecord_batch.go index 77700b7a89..4cf4a11017 100644 --- a/flow/model/qrecord_batch.go +++ b/flow/model/qrecord_batch.go @@ -154,6 +154,14 @@ func (src *QRecordBatchCopyFromSource) Values() ([]interface{}, error) { } values[i] = v + case qvalue.QValueKindCIDR, qvalue.QValueKindINET: + v, ok := qValue.Value.(string) + if !ok { + src.err = errors.New("invalid INET/CIDR value") + return nil, src.err + } + values[i] = v + case qvalue.QValueKindTime: t, ok := qValue.Value.(time.Time) if !ok { diff --git a/flow/model/qvalue/avro_converter.go b/flow/model/qvalue/avro_converter.go index d5aef18113..ea90af00ed 100644 --- a/flow/model/qvalue/avro_converter.go +++ b/flow/model/qvalue/avro_converter.go @@ -58,7 +58,7 @@ type AvroSchemaField struct { // will return an error. func GetAvroSchemaFromQValueKind(kind QValueKind, targetDWH QDWHType, precision int16, scale int16) (interface{}, error) { switch kind { - case QValueKindString, QValueKindQChar: + case QValueKindString, QValueKindQChar, QValueKindCIDR, QValueKindINET: return "string", nil case QValueKindUUID: return AvroSchemaLogical{ diff --git a/flow/model/qvalue/qvalue.go b/flow/model/qvalue/qvalue.go index 1ad07150b6..972063d494 100644 --- a/flow/model/qvalue/qvalue.go +++ b/flow/model/qvalue/qvalue.go @@ -56,7 +56,7 @@ func (q QValue) Equals(other QValue) bool { } else { return false } - case QValueKindString: + case QValueKindString, QValueKindINET, QValueKindCIDR: return compareString(q.Value, other.Value) // all internally represented as a Golang time.Time case QValueKindDate, From 84194d9dbf93757dfbe8b876b79f003130cfab7e Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj Date: Fri, 1 Mar 2024 22:25:55 +0530 Subject: [PATCH 03/13] Bigquery: Support JSON, FLOAT PKey For Merge (#1415) Float, JSON (and other [non-groupable types in bigquery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#groupable_data_types )) as primary key is not supported by our merge statement in BigQuery as during our primary key comparison, BigQuery cannot compare, for example, JSON values: ``` googleapi: Error 400: Equality is not defined for arguments of type JSON ``` This PR makes a step towards supporting such columns in primary keys for BigQuery merge by transforming it to string there for `PARTITION BY` and comparison Test added --- .../bigquery/merge_stmt_generator.go | 53 ++++++++++++++----- flow/e2e/bigquery/peer_flow_bq_test.go | 53 +++++++++++++++++++ 2 files changed, 94 insertions(+), 12 deletions(-) diff --git a/flow/connectors/bigquery/merge_stmt_generator.go b/flow/connectors/bigquery/merge_stmt_generator.go index eb8ebb6177..59a269a092 100644 --- a/flow/connectors/bigquery/merge_stmt_generator.go +++ b/flow/connectors/bigquery/merge_stmt_generator.go @@ -100,6 +100,44 @@ func (m *mergeStmtGenerator) generateFlattenedCTE() string { m.syncBatchID, m.dstTableName) } +// This function is to support datatypes like JSON which cannot be partitioned by or compared by BigQuery +func (m *mergeStmtGenerator) transformedPkeyStrings(forPartition bool) []string { + pkeys := make([]string, 0, len(m.normalizedTableSchema.PrimaryKeyColumns)) + columnNameTypeMap := make(map[string]qvalue.QValueKind, len(m.normalizedTableSchema.Columns)) + for _, col := range m.normalizedTableSchema.Columns { + columnNameTypeMap[col.Name] = qvalue.QValueKind(col.Type) + } + + for _, pkeyCol := range m.normalizedTableSchema.PrimaryKeyColumns { + pkeyColType, ok := columnNameTypeMap[pkeyCol] + if !ok { + continue + } + switch pkeyColType { + case qvalue.QValueKindJSON: + if forPartition { + pkeys = append(pkeys, fmt.Sprintf("TO_JSON_STRING(%s)", m.shortColumn[pkeyCol])) + } else { + pkeys = append(pkeys, fmt.Sprintf("TO_JSON_STRING(_t.`%s`)=TO_JSON_STRING(_d.%s)", + pkeyCol, m.shortColumn[pkeyCol])) + } + case qvalue.QValueKindFloat32, qvalue.QValueKindFloat64: + if forPartition { + pkeys = append(pkeys, fmt.Sprintf("CAST(%s as STRING)", m.shortColumn[pkeyCol])) + } else { + pkeys = append(pkeys, fmt.Sprintf("_t.`%s`=_d.%s", pkeyCol, m.shortColumn[pkeyCol])) + } + default: + if forPartition { + pkeys = append(pkeys, m.shortColumn[pkeyCol]) + } else { + pkeys = append(pkeys, fmt.Sprintf("_t.`%s`=_d.%s", pkeyCol, m.shortColumn[pkeyCol])) + } + } + } + return pkeys +} + // generateDeDupedCTE generates a de-duped CTE. func (m *mergeStmtGenerator) generateDeDupedCTE() string { const cte = `_dd AS ( @@ -111,13 +149,8 @@ func (m *mergeStmtGenerator) generateDeDupedCTE() string { WHERE _peerdb_rank=1 ) SELECT * FROM _dd` - shortPkeys := make([]string, 0, len(m.normalizedTableSchema.PrimaryKeyColumns)) - for _, pkeyCol := range m.normalizedTableSchema.PrimaryKeyColumns { - shortPkeys = append(shortPkeys, m.shortColumn[pkeyCol]) - } - - pkeyColsStr := fmt.Sprintf("(CONCAT(%s))", strings.Join(shortPkeys, - ", '_peerdb_concat_', ")) + shortPkeys := m.transformedPkeyStrings(true) + pkeyColsStr := strings.Join(shortPkeys, ",") return fmt.Sprintf(cte, pkeyColsStr) } @@ -151,11 +184,7 @@ func (m *mergeStmtGenerator) generateMergeStmt(unchangedToastColumns []string) s } updateStringToastCols := strings.Join(updateStatementsforToastCols, " ") - pkeySelectSQLArray := make([]string, 0, len(m.normalizedTableSchema.PrimaryKeyColumns)) - for _, pkeyColName := range m.normalizedTableSchema.PrimaryKeyColumns { - pkeySelectSQLArray = append(pkeySelectSQLArray, fmt.Sprintf("_t.%s=_d.%s", - pkeyColName, m.shortColumn[pkeyColName])) - } + pkeySelectSQLArray := m.transformedPkeyStrings(false) // t. = d. AND t. = d. ... pkeySelectSQL := strings.Join(pkeySelectSQLArray, " AND ") diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 9d0b8f6a57..74a1949ff7 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -1514,3 +1514,56 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { require.NoError(s.t, err) require.Equal(s.t, int64(0), numNewRows) } + +func (s PeerFlowE2ETestSuiteBQ) Test_JSON_PKey_BQ() { + env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + + srcTableName := s.attachSchemaSuffix("test_json_pkey_bq") + dstTableName := "test_json_pkey_bq" + + _, err := s.Conn().Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL NOT NULL, + j JSON NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL + ); + `, srcTableName)) + require.NoError(s.t, err) + + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + ALTER TABLE %s REPLICA IDENTITY FULL + `, srcTableName)) + require.NoError(s.t, err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_json_pkey_flow"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + Destination: s.bqHelper.Peer, + CdcStagingPath: "", + } + + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig.MaxBatchSize = 100 + + go func() { + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 10 rows into the source table + for i := range 10 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + testJson := `'{"name":"jack", "age":12, "spouse":null}'::json` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s(key, value, j) VALUES ($1, $2, %s) + `, srcTableName, testJson), testKey, testValue) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") + + e2e.EnvWaitForEqualTables(env, s, "normalize inserts", dstTableName, "id,key,value,j") + env.CancelWorkflow() + }() + + env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.RequireEnvCanceled(s.t, env) +} From 3f3ff9bb99b9f78c691dd23a87fc7dc1c55a1321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 1 Mar 2024 19:05:44 +0000 Subject: [PATCH 04/13] Fix regressed signals test (#1417) Pause is now hard pause, so don't test that pause includes final sync This part of test was a bit racy anyways --------- Co-authored-by: Amogh Bharadwaj --- flow/e2e/postgres/peer_flow_pg_test.go | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 31db45788d..a57613edda 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -1215,9 +1215,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { if !s.t.Failed() { // wait for first RegisterDelayedCallback to hit. e2e.EnvWaitFor(s.t, env, 1*time.Minute, "sent pause signal", func() bool { - // adding 1 more row while pausing - guarantee finishing another sync - addRows(1) - return sentPause }) } else { @@ -1258,10 +1255,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { }, 56*time.Second) go func() { - e2e.EnvWaitFor(s.t, env, 1*time.Minute, "normalize 1 record - first table", func() bool { - return s.comparePGTables(srcTable1Name, dstTable1Name, "id,t") == nil - }) - // we have a paused mirror, wait for second signal to hit. e2e.EnvWaitFor(s.t, env, 1*time.Minute, "sent updates signal", func() bool { return sentUpdate @@ -1276,9 +1269,13 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { e2e.EnvWaitFor(s.t, env, 1*time.Minute, "normalize 18 records - first table", func() bool { return s.comparePGTables(srcTable1Name, dstTable1Name, "id,t") == nil }) - e2e.EnvWaitFor(s.t, env, 1*time.Minute, "initial load + normalize 18 records - second table", func() bool { - return s.comparePGTables(srcTable2Name, dstTable2Name, "id,t") == nil + /* TODO fix in integration tests + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "initial load + normalize 18 records - second table", func() bool { + err := s.comparePGTables(srcTable2Name, dstTable2Name, "id,t") + s.t.Log("TEST", err) + return err == nil }) + */ workflowState = getWorkflowState() assert.EqualValues(s.t, 14, workflowState.SyncFlowOptions.IdleTimeoutSeconds) @@ -1287,9 +1284,8 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { assert.Len(s.t, workflowState.SyncFlowOptions.SrcTableIdNameMapping, 2) assert.Len(s.t, workflowState.SyncFlowOptions.TableNameSchemaMapping, 2) // 3 from first insert of 18 rows in 1 table - // 1 from pre-pause - // 3 from second insert of 18 rows in 2 tables, batch size updated - assert.GreaterOrEqual(s.t, len(workflowState.SyncFlowStatuses), 3+1+3) + // TODO 3 from second insert of 18 rows in 2 tables, batch size updated + assert.GreaterOrEqual(s.t, len(workflowState.SyncFlowStatuses), 4) env.CancelWorkflow() }() From 54ad0ef044f553bffe60b2e72c606ad332b2f48e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 1 Mar 2024 20:42:25 +0000 Subject: [PATCH 05/13] Remove integer TaskQueueID (#1419) Removes need for error check Also previously both task queues had same iota value, so that was wrong --- flow/cmd/api.go | 6 +----- flow/cmd/snapshot_worker.go | 6 +----- flow/cmd/worker.go | 6 +----- flow/shared/constants.go | 37 +++++++++------------------------ flow/workflows/cdc_flow.go | 6 +----- flow/workflows/snapshot_flow.go | 6 +----- 6 files changed, 15 insertions(+), 52 deletions(-) diff --git a/flow/cmd/api.go b/flow/cmd/api.go index ae64b372be..f24178efda 100644 --- a/flow/cmd/api.go +++ b/flow/cmd/api.go @@ -124,11 +124,7 @@ func APIMain(ctx context.Context, args *APIServerParams) error { return fmt.Errorf("unable to get catalog connection pool: %w", err) } - taskQueue, err := shared.GetPeerFlowTaskQueueName(shared.PeerFlowTaskQueueID) - if err != nil { - return err - } - + taskQueue := shared.GetPeerFlowTaskQueueName(shared.PeerFlowTaskQueue) flowHandler := NewFlowRequestHandler(tc, catalogConn, taskQueue) err = killExistingScheduleFlows(ctx, tc, args.TemporalNamespace, taskQueue) diff --git a/flow/cmd/snapshot_worker.go b/flow/cmd/snapshot_worker.go index bc53785382..89680f51d9 100644 --- a/flow/cmd/snapshot_worker.go +++ b/flow/cmd/snapshot_worker.go @@ -53,11 +53,7 @@ func SnapshotWorkerMain(opts *SnapshotWorkerOptions) error { } defer c.Close() - taskQueue, queueErr := shared.GetPeerFlowTaskQueueName(shared.SnapshotFlowTaskQueueID) - if queueErr != nil { - return queueErr - } - + taskQueue := shared.GetPeerFlowTaskQueueName(shared.SnapshotFlowTaskQueue) w := worker.New(c, taskQueue, worker.Options{ EnableSessionWorker: true, }) diff --git a/flow/cmd/worker.go b/flow/cmd/worker.go index ee9218a9da..c43515dc98 100644 --- a/flow/cmd/worker.go +++ b/flow/cmd/worker.go @@ -123,11 +123,7 @@ func WorkerMain(opts *WorkerOptions) error { slog.Info("Created temporal client") defer c.Close() - taskQueue, queueErr := shared.GetPeerFlowTaskQueueName(shared.PeerFlowTaskQueueID) - if queueErr != nil { - return queueErr - } - + taskQueue := shared.GetPeerFlowTaskQueueName(shared.PeerFlowTaskQueue) w := worker.New(c, taskQueue, worker.Options{ EnableSessionWorker: true, }) diff --git a/flow/shared/constants.go b/flow/shared/constants.go index fe8320b446..e6e982c321 100644 --- a/flow/shared/constants.go +++ b/flow/shared/constants.go @@ -6,10 +6,15 @@ import ( "github.com/PeerDB-io/peer-flow/peerdbenv" ) +type ( + ContextKey string + TaskQueueID string +) + const ( // Task Queues - peerFlowTaskQueue = "peer-flow-task-queue" - snapshotFlowTaskQueue = "snapshot-flow-task-queue" + PeerFlowTaskQueue TaskQueueID = "peer-flow-task-queue" + SnapshotFlowTaskQueue TaskQueueID = "snapshot-flow-task-queue" // Queries CDCFlowStateQuery = "q-cdc-flow-state" @@ -22,42 +27,20 @@ const ( const MirrorNameSearchAttribute = "MirrorName" -type ( - ContextKey string -) - const ( FlowNameKey ContextKey = "flowName" PartitionIDKey ContextKey = "partitionId" DeploymentUIDKey ContextKey = "deploymentUid" ) -type TaskQueueID int64 - -const ( - PeerFlowTaskQueueID TaskQueueID = iota - SnapshotFlowTaskQueueID TaskQueueID = iota -) - const FetchAndChannelSize = 256 * 1024 -func GetPeerFlowTaskQueueName(taskQueueID TaskQueueID) (string, error) { - switch taskQueueID { - case PeerFlowTaskQueueID: - return prependUIDToTaskQueueName(peerFlowTaskQueue), nil - case SnapshotFlowTaskQueueID: - return prependUIDToTaskQueueName(snapshotFlowTaskQueue), nil - default: - return "", fmt.Errorf("unknown task queue id %d", taskQueueID) - } -} - -func prependUIDToTaskQueueName(taskQueueName string) string { +func GetPeerFlowTaskQueueName(taskQueueID TaskQueueID) string { deploymentUID := peerdbenv.PeerDBDeploymentUID() if deploymentUID == "" { - return taskQueueName + return string(taskQueueID) } - return fmt.Sprintf("%s-%s", deploymentUID, taskQueueName) + return fmt.Sprintf("%s-%s", deploymentUID, taskQueueID) } func GetDeploymentUID() string { diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index c9825d4e80..e0629d5233 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -371,11 +371,7 @@ func CDCFlowWorkflow( // next part of the setup is to snapshot-initial-copy and setup replication slots. snapshotFlowID := GetChildWorkflowID("snapshot-flow", cfg.FlowJobName, originalRunID) - taskQueue, err := shared.GetPeerFlowTaskQueueName(shared.SnapshotFlowTaskQueueID) - if err != nil { - return state, err - } - + taskQueue := shared.GetPeerFlowTaskQueueName(shared.SnapshotFlowTaskQueue) childSnapshotFlowOpts := workflow.ChildWorkflowOptions{ WorkflowID: snapshotFlowID, ParentClosePolicy: enums.PARENT_CLOSE_POLICY_REQUEST_CANCEL, diff --git a/flow/workflows/snapshot_flow.go b/flow/workflows/snapshot_flow.go index fd8f539083..a42110f769 100644 --- a/flow/workflows/snapshot_flow.go +++ b/flow/workflows/snapshot_flow.go @@ -105,11 +105,7 @@ func (s *SnapshotFlowExecution) cloneTable( s.logger.Info(fmt.Sprintf("Obtained child id %s for source table %s and destination table %s", childWorkflowID, srcName, dstName), cloneLog) - taskQueue, queueErr := shared.GetPeerFlowTaskQueueName(shared.PeerFlowTaskQueueID) - if queueErr != nil { - return queueErr - } - + taskQueue := shared.GetPeerFlowTaskQueueName(shared.PeerFlowTaskQueue) childCtx := workflow.WithChildOptions(ctx, workflow.ChildWorkflowOptions{ WorkflowID: childWorkflowID, WorkflowTaskTimeout: 5 * time.Minute, From 03f21b3b3237486fd099b47c47f2b26276f70b45 Mon Sep 17 00:00:00 2001 From: Kaushik Iska Date: Fri, 1 Mar 2024 16:02:14 -0500 Subject: [PATCH 06/13] Fix column exclusion (initial-load) (#1420) --- flow/workflows/cdc_flow.go | 7 ++++++- flow/workflows/snapshot_flow.go | 11 ++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index e0629d5233..1891d7f6c1 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -383,7 +383,12 @@ func CDCFlowWorkflow( WaitForCancellation: true, } snapshotFlowCtx := workflow.WithChildOptions(ctx, childSnapshotFlowOpts) - snapshotFlowFuture := workflow.ExecuteChildWorkflow(snapshotFlowCtx, SnapshotFlowWorkflow, cfg) + snapshotFlowFuture := workflow.ExecuteChildWorkflow( + snapshotFlowCtx, + SnapshotFlowWorkflow, + cfg, + state.SyncFlowOptions.TableNameSchemaMapping, + ) if err := snapshotFlowFuture.Get(snapshotFlowCtx, nil); err != nil { w.logger.Error("snapshot flow failed", slog.Any("error", err)) return state, fmt.Errorf("failed to execute snapshot workflow: %w", err) diff --git a/flow/workflows/snapshot_flow.go b/flow/workflows/snapshot_flow.go index a42110f769..050bc604e5 100644 --- a/flow/workflows/snapshot_flow.go +++ b/flow/workflows/snapshot_flow.go @@ -238,10 +238,15 @@ func (s *SnapshotFlowExecution) cloneTablesWithSlot( return nil } -func SnapshotFlowWorkflow(ctx workflow.Context, config *protos.FlowConnectionConfigs) error { +func SnapshotFlowWorkflow( + ctx workflow.Context, + config *protos.FlowConnectionConfigs, + tableNameSchemaMapping map[string]*protos.TableSchema, +) error { se := &SnapshotFlowExecution{ - config: config, - logger: log.With(workflow.GetLogger(ctx), slog.String(string(shared.FlowNameKey), config.FlowJobName)), + config: config, + tableNameSchemaMapping: tableNameSchemaMapping, + logger: log.With(workflow.GetLogger(ctx), slog.String(string(shared.FlowNameKey), config.FlowJobName)), } numTablesInParallel := int(max(config.SnapshotNumTablesInParallel, 1)) From 0ac9008809e31acfd2dd49fa3bb55938539db3bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sun, 3 Mar 2024 17:49:40 +0000 Subject: [PATCH 07/13] Move utils.Array into shared (#1423) shared should not depend on any other internal packages in order to avoid import cycles Also move shared/alerting to its own top directory Ran into this while attempting to introduce a `Default[T]()` to shared to turn on exhaustruct lint --- flow/activities/flowable.go | 2 +- flow/activities/snapshot_activity.go | 2 +- flow/{shared => }/alerting/alerting.go | 0 flow/{shared => }/alerting/slack_alert_sender.go | 0 flow/cmd/snapshot_worker.go | 2 +- flow/cmd/worker.go | 2 +- flow/connectors/bigquery/bigquery.go | 2 +- flow/connectors/bigquery/merge_stmt_generator.go | 4 ++-- flow/connectors/core.go | 2 +- flow/connectors/postgres/normalize_stmt_generator.go | 3 ++- flow/connectors/postgres/postgres.go | 5 +++-- flow/connectors/postgres/qvalue_convert.go | 4 ++-- flow/connectors/snowflake/merge_stmt_generator.go | 3 ++- flow/e2e/test_utils.go | 2 +- flow/shared/additional_tables.go | 5 ++--- flow/{connectors/utils => shared}/array.go | 2 +- 16 files changed, 21 insertions(+), 19 deletions(-) rename flow/{shared => }/alerting/alerting.go (100%) rename flow/{shared => }/alerting/slack_alert_sender.go (100%) rename flow/{connectors/utils => shared}/array.go (98%) diff --git a/flow/activities/flowable.go b/flow/activities/flowable.go index 9fa859b5b4..1d6433c8f4 100644 --- a/flow/activities/flowable.go +++ b/flow/activities/flowable.go @@ -16,6 +16,7 @@ import ( "golang.org/x/sync/errgroup" "google.golang.org/protobuf/proto" + "github.com/PeerDB-io/peer-flow/alerting" "github.com/PeerDB-io/peer-flow/connectors" connbigquery "github.com/PeerDB-io/peer-flow/connectors/bigquery" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" @@ -26,7 +27,6 @@ import ( "github.com/PeerDB-io/peer-flow/model" "github.com/PeerDB-io/peer-flow/peerdbenv" "github.com/PeerDB-io/peer-flow/shared" - "github.com/PeerDB-io/peer-flow/shared/alerting" ) // CheckConnectionResult is the result of a CheckConnection call. diff --git a/flow/activities/snapshot_activity.go b/flow/activities/snapshot_activity.go index 0e75ea2e06..6ee2bb5a8b 100644 --- a/flow/activities/snapshot_activity.go +++ b/flow/activities/snapshot_activity.go @@ -9,11 +9,11 @@ import ( "go.temporal.io/sdk/activity" + "github.com/PeerDB-io/peer-flow/alerting" "github.com/PeerDB-io/peer-flow/connectors" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/shared" - "github.com/PeerDB-io/peer-flow/shared/alerting" ) type SnapshotActivity struct { diff --git a/flow/shared/alerting/alerting.go b/flow/alerting/alerting.go similarity index 100% rename from flow/shared/alerting/alerting.go rename to flow/alerting/alerting.go diff --git a/flow/shared/alerting/slack_alert_sender.go b/flow/alerting/slack_alert_sender.go similarity index 100% rename from flow/shared/alerting/slack_alert_sender.go rename to flow/alerting/slack_alert_sender.go diff --git a/flow/cmd/snapshot_worker.go b/flow/cmd/snapshot_worker.go index 89680f51d9..07cf5a19dc 100644 --- a/flow/cmd/snapshot_worker.go +++ b/flow/cmd/snapshot_worker.go @@ -11,10 +11,10 @@ import ( "go.temporal.io/sdk/worker" "github.com/PeerDB-io/peer-flow/activities" + "github.com/PeerDB-io/peer-flow/alerting" utils "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" "github.com/PeerDB-io/peer-flow/logger" "github.com/PeerDB-io/peer-flow/shared" - "github.com/PeerDB-io/peer-flow/shared/alerting" peerflow "github.com/PeerDB-io/peer-flow/workflows" ) diff --git a/flow/cmd/worker.go b/flow/cmd/worker.go index c43515dc98..476a274de6 100644 --- a/flow/cmd/worker.go +++ b/flow/cmd/worker.go @@ -16,11 +16,11 @@ import ( "go.temporal.io/sdk/worker" "github.com/PeerDB-io/peer-flow/activities" + "github.com/PeerDB-io/peer-flow/alerting" "github.com/PeerDB-io/peer-flow/connectors" utils "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" "github.com/PeerDB-io/peer-flow/logger" "github.com/PeerDB-io/peer-flow/shared" - "github.com/PeerDB-io/peer-flow/shared/alerting" peerflow "github.com/PeerDB-io/peer-flow/workflows" ) diff --git a/flow/connectors/bigquery/bigquery.go b/flow/connectors/bigquery/bigquery.go index 5d3bf1e679..5963cedc10 100644 --- a/flow/connectors/bigquery/bigquery.go +++ b/flow/connectors/bigquery/bigquery.go @@ -537,7 +537,7 @@ func (c *BigQueryConnector) NormalizeRecords(ctx context.Context, req *model.Nor // doesn't exceed the limit. We should make this configurable. const batchSize = 8 stmtNum := 0 - err = utils.ArrayIterChunks(unchangedToastColumns, batchSize, func(chunk []string) error { + err = shared.ArrayIterChunks(unchangedToastColumns, batchSize, func(chunk []string) error { stmtNum += 1 mergeStmt := mergeGen.generateMergeStmt(chunk) c.logger.Info(fmt.Sprintf("running merge statement %d for table %s..", diff --git a/flow/connectors/bigquery/merge_stmt_generator.go b/flow/connectors/bigquery/merge_stmt_generator.go index 59a269a092..e7810f6a8f 100644 --- a/flow/connectors/bigquery/merge_stmt_generator.go +++ b/flow/connectors/bigquery/merge_stmt_generator.go @@ -6,9 +6,9 @@ import ( "cloud.google.com/go/bigquery" - "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model/qvalue" + "github.com/PeerDB-io/peer-flow/shared" ) type mergeStmtGenerator struct { @@ -233,7 +233,7 @@ func (m *mergeStmtGenerator) generateUpdateStatements(allCols []string, unchange for _, cols := range unchangedToastColumns { unchangedColsArray := strings.Split(cols, ",") - otherCols := utils.ArrayMinus(allCols, unchangedColsArray) + otherCols := shared.ArrayMinus(allCols, unchangedColsArray) tmpArray := make([]string, 0, len(otherCols)) for _, colName := range otherCols { tmpArray = append(tmpArray, fmt.Sprintf("`%s`=_d.%s", colName, m.shortColumn[colName])) diff --git a/flow/connectors/core.go b/flow/connectors/core.go index 47621ea2d0..ed306f73bb 100644 --- a/flow/connectors/core.go +++ b/flow/connectors/core.go @@ -7,6 +7,7 @@ import ( "github.com/jackc/pgx/v5/pgxpool" + "github.com/PeerDB-io/peer-flow/alerting" connbigquery "github.com/PeerDB-io/peer-flow/connectors/bigquery" connclickhouse "github.com/PeerDB-io/peer-flow/connectors/clickhouse" conneventhub "github.com/PeerDB-io/peer-flow/connectors/eventhub" @@ -17,7 +18,6 @@ import ( "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/logger" "github.com/PeerDB-io/peer-flow/model" - "github.com/PeerDB-io/peer-flow/shared/alerting" ) var ErrUnsupportedFunctionality = errors.New("requested connector does not support functionality") diff --git a/flow/connectors/postgres/normalize_stmt_generator.go b/flow/connectors/postgres/normalize_stmt_generator.go index 01fe11273d..f9ff1d5f07 100644 --- a/flow/connectors/postgres/normalize_stmt_generator.go +++ b/flow/connectors/postgres/normalize_stmt_generator.go @@ -11,6 +11,7 @@ import ( "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model/qvalue" + "github.com/PeerDB-io/peer-flow/shared" ) type normalizeStmtGenerator struct { @@ -201,7 +202,7 @@ func (n *normalizeStmtGenerator) generateUpdateStatements(quotedCols []string) [ for i, unchangedToastCol := range unchangedColsArray { unchangedColsArray[i] = QuoteIdentifier(unchangedToastCol) } - otherCols := utils.ArrayMinus(quotedCols, unchangedColsArray) + otherCols := shared.ArrayMinus(quotedCols, unchangedColsArray) tmpArray := make([]string, 0, len(otherCols)) for _, colName := range otherCols { tmpArray = append(tmpArray, fmt.Sprintf("%s=src.%s", colName, colName)) diff --git a/flow/connectors/postgres/postgres.go b/flow/connectors/postgres/postgres.go index dd3f24b5a9..49afa99002 100644 --- a/flow/connectors/postgres/postgres.go +++ b/flow/connectors/postgres/postgres.go @@ -19,13 +19,14 @@ import ( "go.temporal.io/sdk/log" "go.temporal.io/sdk/temporal" + "github.com/PeerDB-io/peer-flow/alerting" "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/connectors/utils/monitoring" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/logger" "github.com/PeerDB-io/peer-flow/model" "github.com/PeerDB-io/peer-flow/model/qvalue" - "github.com/PeerDB-io/peer-flow/shared/alerting" + "github.com/PeerDB-io/peer-flow/shared" ) type PostgresConnector struct { @@ -1124,7 +1125,7 @@ func (c *PostgresConnector) AddTablesToPublication(ctx context.Context, req *pro if err != nil { return fmt.Errorf("failed to check tables in publication: %w", err) } - notPresentTables := utils.ArrayMinus(additionalSrcTables, tableNames) + notPresentTables := shared.ArrayMinus(additionalSrcTables, tableNames) if len(notPresentTables) > 0 { return fmt.Errorf("some additional tables not present in custom publication: %s", strings.Join(notPresentTables, ", ")) diff --git a/flow/connectors/postgres/qvalue_convert.go b/flow/connectors/postgres/qvalue_convert.go index defc2f492f..830d9b8450 100644 --- a/flow/connectors/postgres/qvalue_convert.go +++ b/flow/connectors/postgres/qvalue_convert.go @@ -12,8 +12,8 @@ import ( "github.com/jackc/pgx/v5/pgtype" "github.com/lib/pq/oid" - "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/model/qvalue" + "github.com/PeerDB-io/peer-flow/shared" ) var big10 = big.NewInt(10) @@ -208,7 +208,7 @@ func convertToArray[T any](kind qvalue.QValueKind, value interface{}) (qvalue.QV case []T: return qvalue.QValue{Kind: kind, Value: v}, nil case []interface{}: - return qvalue.QValue{Kind: kind, Value: utils.ArrayCastElements[T](v)}, nil + return qvalue.QValue{Kind: kind, Value: shared.ArrayCastElements[T](v)}, nil } return qvalue.QValue{}, fmt.Errorf("failed to parse array %s from %T: %v", kind, value, value) } diff --git a/flow/connectors/snowflake/merge_stmt_generator.go b/flow/connectors/snowflake/merge_stmt_generator.go index dd166e708f..98b45f3911 100644 --- a/flow/connectors/snowflake/merge_stmt_generator.go +++ b/flow/connectors/snowflake/merge_stmt_generator.go @@ -8,6 +8,7 @@ import ( "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model/numeric" "github.com/PeerDB-io/peer-flow/model/qvalue" + "github.com/PeerDB-io/peer-flow/shared" ) type mergeStmtGenerator struct { @@ -178,7 +179,7 @@ func (m *mergeStmtGenerator) generateUpdateStatements(allCols []string) []string for _, cols := range m.unchangedToastColumns { unchangedColsArray := strings.Split(cols, ",") - otherCols := utils.ArrayMinus(allCols, unchangedColsArray) + otherCols := shared.ArrayMinus(allCols, unchangedColsArray) tmpArray := make([]string, 0, len(otherCols)+2) for _, colName := range otherCols { normalizedColName := SnowflakeIdentifierNormalize(colName) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 598297106b..6239f7d148 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -23,6 +23,7 @@ import ( "go.temporal.io/sdk/worker" "github.com/PeerDB-io/peer-flow/activities" + "github.com/PeerDB-io/peer-flow/alerting" "github.com/PeerDB-io/peer-flow/connectors" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" connsnowflake "github.com/PeerDB-io/peer-flow/connectors/snowflake" @@ -34,7 +35,6 @@ import ( "github.com/PeerDB-io/peer-flow/model" "github.com/PeerDB-io/peer-flow/model/qvalue" "github.com/PeerDB-io/peer-flow/shared" - "github.com/PeerDB-io/peer-flow/shared/alerting" peerflow "github.com/PeerDB-io/peer-flow/workflows" ) diff --git a/flow/shared/additional_tables.go b/flow/shared/additional_tables.go index 00dc8efa42..0eb0b79f35 100644 --- a/flow/shared/additional_tables.go +++ b/flow/shared/additional_tables.go @@ -1,7 +1,6 @@ package shared import ( - "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" ) @@ -22,6 +21,6 @@ func AdditionalTablesHasOverlap(currentTableMappings []*protos.TableMapping, additionalDstTables = append(additionalDstTables, additionalTableMapping.DestinationTableIdentifier) } - return utils.ArraysHaveOverlap(currentSrcTables, additionalSrcTables) || - utils.ArraysHaveOverlap(currentDstTables, additionalDstTables) + return ArraysHaveOverlap(currentSrcTables, additionalSrcTables) || + ArraysHaveOverlap(currentDstTables, additionalDstTables) } diff --git a/flow/connectors/utils/array.go b/flow/shared/array.go similarity index 98% rename from flow/connectors/utils/array.go rename to flow/shared/array.go index 2633153ae6..4acca12d4d 100644 --- a/flow/connectors/utils/array.go +++ b/flow/shared/array.go @@ -1,4 +1,4 @@ -package utils +package shared // first - second func ArrayMinus[T comparable](first, second []T) []T { From 72cfd9da408774b9f509a10dc5c502ab1b189e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Sun, 3 Mar 2024 19:22:57 +0000 Subject: [PATCH 08/13] CDCFlowWorkflowState: remove Progress, Statuses, Errors (#1424) All of these should be logged instead of put in state, this avoids queries failing to due to size being too large Since normalize/sync flows handle their logging, reduce signalling too SyncFlowStatuses was being checked by dynamic signals test to test exact batch numbering. Be looser: testing that data is being replicated properly is sufficient --- flow/e2e/postgres/peer_flow_pg_test.go | 5 -- flow/model/signals.go | 14 +---- flow/workflows/cdc_flow.go | 83 ++++---------------------- flow/workflows/normalize_flow.go | 14 +---- flow/workflows/sync_flow.go | 15 +++-- 5 files changed, 23 insertions(+), 108 deletions(-) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index a57613edda..0ec31befe4 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -1209,8 +1209,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { assert.Len(s.t, workflowState.SyncFlowOptions.TableMappings, 1) assert.Len(s.t, workflowState.SyncFlowOptions.SrcTableIdNameMapping, 1) assert.Len(s.t, workflowState.SyncFlowOptions.TableNameSchemaMapping, 1) - // we have limited batch size to 6, so atleast 3 syncs needed - assert.GreaterOrEqual(s.t, len(workflowState.SyncFlowStatuses), 3) if !s.t.Failed() { // wait for first RegisterDelayedCallback to hit. @@ -1283,9 +1281,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { assert.Len(s.t, workflowState.SyncFlowOptions.TableMappings, 2) assert.Len(s.t, workflowState.SyncFlowOptions.SrcTableIdNameMapping, 2) assert.Len(s.t, workflowState.SyncFlowOptions.TableNameSchemaMapping, 2) - // 3 from first insert of 18 rows in 1 table - // TODO 3 from second insert of 18 rows in 2 tables, batch size updated - assert.GreaterOrEqual(s.t, len(workflowState.SyncFlowStatuses), 4) env.CancelWorkflow() }() diff --git a/flow/model/signals.go b/flow/model/signals.go index 5e30defd63..45ec805339 100644 --- a/flow/model/signals.go +++ b/flow/model/signals.go @@ -134,11 +134,7 @@ var SyncStopSignal = TypedSignal[struct{}]{ Name: "sync-stop", } -var SyncErrorSignal = TypedSignal[string]{ - Name: "sync-error", -} - -var SyncResultSignal = TypedSignal[SyncResponse]{ +var SyncResultSignal = TypedSignal[*SyncResponse]{ Name: "sync-result", } @@ -150,14 +146,6 @@ var NormalizeSignal = TypedSignal[NormalizePayload]{ Name: "normalize", } -var NormalizeErrorSignal = TypedSignal[string]{ - Name: "normalize-error", -} - -var NormalizeResultSignal = TypedSignal[NormalizeResponse]{ - Name: "normalize-result", -} - var NormalizeDoneSignal = TypedSignal[struct{}]{ Name: "normalize-done", } diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index 1891d7f6c1..419c2df3db 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -22,18 +22,8 @@ import ( ) type CDCFlowWorkflowState struct { - // Progress events for the peer flow. - Progress []string - // Accumulates status for sync flows spawned. - SyncFlowStatuses []model.SyncResponse - // Accumulates status for normalize flows spawned. - NormalizeFlowStatuses []model.NormalizeResponse // Current signalled state of the peer flow. ActiveSignal model.CDCFlowSignal - // Errors encountered during child sync flow executions. - SyncFlowErrors []string - // Errors encountered during child sync flow executions. - NormalizeFlowErrors []string // Global mapping of relation IDs to RelationMessages sent as a part of logical replication. // Needed to support schema changes. RelationMessageMapping model.RelationMessageMapping @@ -51,15 +41,10 @@ func NewCDCFlowWorkflowState(cfg *protos.FlowConnectionConfigs) *CDCFlowWorkflow tableMappings = append(tableMappings, proto.Clone(tableMapping).(*protos.TableMapping)) } return &CDCFlowWorkflowState{ - Progress: []string{"started"}, // 1 more than the limit of 10 - SyncFlowStatuses: make([]model.SyncResponse, 0, 11), - NormalizeFlowStatuses: make([]model.NormalizeResponse, 0, 11), - ActiveSignal: model.NoopSignal, - SyncFlowErrors: nil, - NormalizeFlowErrors: nil, - CurrentFlowStatus: protos.FlowStatus_STATUS_SETUP, - FlowConfigUpdate: nil, + ActiveSignal: model.NoopSignal, + CurrentFlowStatus: protos.FlowStatus_STATUS_SETUP, + FlowConfigUpdate: nil, SyncFlowOptions: &protos.SyncFlowOptions{ BatchSize: cfg.MaxBatchSize, IdleTimeoutSeconds: cfg.IdleTimeoutSeconds, @@ -68,32 +53,6 @@ func NewCDCFlowWorkflowState(cfg *protos.FlowConnectionConfigs) *CDCFlowWorkflow } } -// truncate the progress and other arrays to a max of 10 elements -func (s *CDCFlowWorkflowState) TruncateProgress(logger log.Logger) { - if len(s.Progress) > 10 { - copy(s.Progress, s.Progress[len(s.Progress)-10:]) - s.Progress = s.Progress[:10] - } - if len(s.SyncFlowStatuses) > 10 { - copy(s.SyncFlowStatuses, s.SyncFlowStatuses[len(s.SyncFlowStatuses)-10:]) - s.SyncFlowStatuses = s.SyncFlowStatuses[:10] - } - if len(s.NormalizeFlowStatuses) > 10 { - copy(s.NormalizeFlowStatuses, s.NormalizeFlowStatuses[len(s.NormalizeFlowStatuses)-10:]) - s.NormalizeFlowStatuses = s.NormalizeFlowStatuses[:10] - } - - if s.SyncFlowErrors != nil { - logger.Warn("SyncFlowErrors", slog.Any("errors", s.SyncFlowErrors)) - s.SyncFlowErrors = nil - } - - if s.NormalizeFlowErrors != nil { - logger.Warn("NormalizeFlowErrors", slog.Any("errors", s.NormalizeFlowErrors)) - s.NormalizeFlowErrors = nil - } -} - // CDCFlowWorkflowExecution represents the state for execution of a peer flow. type CDCFlowWorkflowExecution struct { flowExecutionID string @@ -429,7 +388,7 @@ func CDCFlowWorkflow( } state.CurrentFlowStatus = protos.FlowStatus_STATUS_RUNNING - state.Progress = append(state.Progress, "executed setup flow and snapshot flow") + w.logger.Info("executed setup flow and snapshot flow") // if initial_copy_only is opted for, we end the flow here. if cfg.InitialSnapshotOnly { @@ -492,7 +451,6 @@ func CDCFlowWorkflow( err := f.Get(ctx, nil) if err != nil { handleError("sync", err) - state.SyncFlowErrors = append(state.SyncFlowErrors, err.Error()) } if restart { @@ -504,7 +462,6 @@ func CDCFlowWorkflow( }).Get(ctx, nil) } else { w.logger.Warn("sync flow ended, restarting", slog.Any("error", err)) - state.TruncateProgress(w.logger) w.startSyncFlow(syncCtx, cfg, state.SyncFlowOptions) mainLoopSelector.AddFuture(w.syncFlowFuture, handleSyncFlow) } @@ -513,7 +470,6 @@ func CDCFlowWorkflow( err := f.Get(ctx, nil) if err != nil { handleError("normalize", err) - state.NormalizeFlowErrors = append(state.NormalizeFlowErrors, err.Error()) } if restart { @@ -522,7 +478,6 @@ func CDCFlowWorkflow( finished = true } else { w.logger.Warn("normalize flow ended, restarting", slog.Any("error", err)) - state.TruncateProgress(w.logger) w.startNormFlow(normCtx, cfg) mainLoopSelector.AddFuture(w.normFlowFuture, handleNormFlow) } @@ -538,30 +493,16 @@ func CDCFlowWorkflow( state.ActiveSignal = model.FlowSignalHandler(state.ActiveSignal, val, w.logger) }) - syncErrorChan := model.SyncErrorSignal.GetSignalChannel(ctx) - syncErrorChan.AddToSelector(mainLoopSelector, func(err string, _ bool) { - syncCount += 1 - state.SyncFlowErrors = append(state.SyncFlowErrors, err) - }) syncResultChan := model.SyncResultSignal.GetSignalChannel(ctx) - syncResultChan.AddToSelector(mainLoopSelector, func(result model.SyncResponse, _ bool) { + syncResultChan.AddToSelector(mainLoopSelector, func(result *model.SyncResponse, _ bool) { syncCount += 1 - if state.SyncFlowOptions.RelationMessageMapping == nil { - state.SyncFlowOptions.RelationMessageMapping = result.RelationMessageMapping - } else { - maps.Copy(state.SyncFlowOptions.RelationMessageMapping, result.RelationMessageMapping) + if result != nil { + if state.SyncFlowOptions.RelationMessageMapping == nil { + state.SyncFlowOptions.RelationMessageMapping = result.RelationMessageMapping + } else { + maps.Copy(state.SyncFlowOptions.RelationMessageMapping, result.RelationMessageMapping) + } } - state.SyncFlowStatuses = append(state.SyncFlowStatuses, result) - }) - - normErrorChan := model.NormalizeErrorSignal.GetSignalChannel(ctx) - normErrorChan.AddToSelector(mainLoopSelector, func(err string, _ bool) { - state.NormalizeFlowErrors = append(state.NormalizeFlowErrors, err) - }) - - normResultChan := model.NormalizeResultSignal.GetSignalChannel(ctx) - normResultChan.AddToSelector(mainLoopSelector, func(result model.NormalizeResponse, _ bool) { - state.NormalizeFlowStatuses = append(state.NormalizeFlowStatuses, result) }) normChan := model.NormalizeSignal.GetSignalChannel(ctx) @@ -613,8 +554,6 @@ func CDCFlowWorkflow( return nil, err } - // important to control the size of inputs. - state.TruncateProgress(w.logger) return state, workflow.NewContinueAsNewError(ctx, CDCFlowWorkflow, cfg, state) } } diff --git a/flow/workflows/normalize_flow.go b/flow/workflows/normalize_flow.go index 5c87b8d05b..adc7a7991a 100644 --- a/flow/workflows/normalize_flow.go +++ b/flow/workflows/normalize_flow.go @@ -98,19 +98,9 @@ func NormalizeFlowWorkflow( var normalizeResponse *model.NormalizeResponse if err := fStartNormalize.Get(normalizeFlowCtx, &normalizeResponse); err != nil { - _ = model.NormalizeErrorSignal.SignalExternalWorkflow( - ctx, - parent.ID, - "", - err.Error(), - ).Get(ctx, nil) + logger.Info("Normalize errored", slog.Any("error", err)) } else if normalizeResponse != nil { - _ = model.NormalizeResultSignal.SignalExternalWorkflow( - ctx, - parent.ID, - "", - *normalizeResponse, - ).Get(ctx, nil) + logger.Info("Normalize finished", slog.Any("result", normalizeResponse)) } } diff --git a/flow/workflows/sync_flow.go b/flow/workflows/sync_flow.go index 2ce225f260..0e4c786e2c 100644 --- a/flow/workflows/sync_flow.go +++ b/flow/workflows/sync_flow.go @@ -13,6 +13,9 @@ import ( "github.com/PeerDB-io/peer-flow/shared" ) +// For now cdc restarts sync flow whenever it itself restarts, +// set this value high enough to never be met, relying on cdc restarts. +// In the future cdc flow restarts could be decoupled from sync flow restarts. const ( maxSyncsPerSyncFlow = 64 ) @@ -103,11 +106,11 @@ func SyncFlowWorkflow( var childSyncFlowRes *model.SyncResponse if err := f.Get(ctx, &childSyncFlowRes); err != nil { logger.Error("failed to execute sync flow", slog.Any("error", err)) - _ = model.SyncErrorSignal.SignalExternalWorkflow( + _ = model.SyncResultSignal.SignalExternalWorkflow( ctx, parent.ID, "", - err.Error(), + nil, ).Get(ctx, nil) syncErr = true } else if childSyncFlowRes != nil { @@ -115,7 +118,7 @@ func SyncFlowWorkflow( ctx, parent.ID, "", - *childSyncFlowRes, + childSyncFlowRes, ).Get(ctx, nil) options.RelationMessageMapping = childSyncFlowRes.RelationMessageMapping totalRecordsSynced += childSyncFlowRes.NumRecordsSynced @@ -145,12 +148,12 @@ func SyncFlowWorkflow( var getModifiedSchemaRes *protos.GetTableSchemaBatchOutput if err := getModifiedSchemaFuture.Get(ctx, &getModifiedSchemaRes); err != nil { - logger.Error("failed to execute schema update at source: ", err) - _ = model.SyncErrorSignal.SignalExternalWorkflow( + logger.Error("failed to execute schema update at source", slog.Any("error", err)) + _ = model.SyncResultSignal.SignalExternalWorkflow( ctx, parent.ID, "", - err.Error(), + nil, ).Get(ctx, nil) } else { for i, srcTable := range modifiedSrcTables { From 5853e4e799437c4b81c17e8b69879b799b0e0d0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Mon, 4 Mar 2024 14:20:33 +0000 Subject: [PATCH 09/13] flow ci: drop temporal's sdk/testsuite in favor of actual integration testing (#1391) https://github.com/temporalio/sdk-go/issues/805 cements temporal's position that sdk/testsuite is for unit testing, not integration testing See also simplification of dynamic signals test where before Kevin had to work out some precise delayed callback timing to get signals right Instead workflows run in another process & tests are only using temporal client to kick off workflows & query workflow state or check databases in a waitfor loop --- .github/workflows/flow.yml | 43 +- flow/alerting/alerting.go | 7 +- flow/cmd/api.go | 2 +- flow/cmd/cert.go | 2 +- flow/cmd/handler.go | 2 +- flow/cmd/mirror_status.go | 2 +- flow/cmd/peer_data.go | 2 +- flow/cmd/snapshot_worker.go | 36 +- flow/cmd/validate_mirror.go | 2 +- flow/cmd/validate_peer.go | 2 +- flow/cmd/version.go | 2 +- flow/cmd/worker.go | 41 +- .../postgres/postgres_schema_delta_test.go | 9 +- flow/connectors/postgres/qrep_bench_test.go | 11 +- .../postgres/qrep_partition_test.go | 3 +- .../postgres/qrep_query_executor_test.go | 11 +- flow/e2e/bigquery/peer_flow_bq_test.go | 935 ++++++------ flow/e2e/bigquery/qrep_flow_bq_test.go | 38 +- flow/e2e/postgres/peer_flow_pg_test.go | 949 ++++++------ flow/e2e/postgres/qrep_flow_pg_test.go | 39 +- flow/e2e/s3/cdc_s3_test.go | 40 +- flow/e2e/s3/qrep_flow_s3_test.go | 24 +- flow/e2e/snowflake/peer_flow_sf_test.go | 1266 ++++++++--------- flow/e2e/snowflake/qrep_flow_sf_test.go | 73 +- .../e2e/sqlserver/qrep_flow_sqlserver_test.go | 14 +- flow/e2e/test_utils.go | 202 +-- flow/{cmd => }/main.go | 68 +- flow/workflows/sync_flow.go | 2 +- stacks/flow.Dockerfile | 6 +- 29 files changed, 1785 insertions(+), 2048 deletions(-) rename flow/{cmd => }/main.go (58%) diff --git a/.github/workflows/flow.yml b/.github/workflows/flow.yml index 0a398c8de4..5f6899b15b 100644 --- a/.github/workflows/flow.yml +++ b/.github/workflows/flow.yml @@ -12,22 +12,17 @@ jobs: matrix: runner: [ubicloud-standard-16-ubuntu-2204-arm] runs-on: ${{ matrix.runner }} - timeout-minutes: 30 + timeout-minutes: 40 services: - pg_cdc: + catalog: image: imresamu/postgis:15-3.4-alpine ports: - - 7132:5432 + - 5432:5432 env: - POSTGRES_USER: postgres + PGUSER: postgres POSTGRES_PASSWORD: postgres POSTGRES_DB: postgres - options: >- - --name pg_cdc - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 + steps: - name: checkout sources uses: actions/checkout@v4 @@ -43,18 +38,12 @@ jobs: go-version: "1.22" cache-dependency-path: flow/go.sum - - name: install gotestsum - run: | - go install gotest.tools/gotestsum@latest - - name: install lib-geos run: | sudo apt-get update sudo apt-get install libgeos-dev - - name: download go modules - run: | - go mod download + - run: go mod download working-directory: ./flow - name: setup gcp service account @@ -87,21 +76,27 @@ jobs: - name: create hstore extension, increase logical replication limits, and setup catalog database run: > - docker exec pg_cdc psql -h localhost -p 5432 -U postgres -c "CREATE EXTENSION hstore;" + docker exec "${{ job.services.catalog.id }}" psql -U postgres -c "CREATE EXTENSION hstore;" -c "ALTER SYSTEM SET wal_level=logical;" -c "ALTER SYSTEM SET max_replication_slots=192;" -c "ALTER SYSTEM SET max_wal_senders=256;" -c "ALTER SYSTEM SET max_connections=2048;" && - (cat ../nexus/catalog/migrations/V{?,??}__* | docker exec -i pg_cdc psql -h localhost -p 5432 -U postgres) && - docker restart pg_cdc - working-directory: ./flow + (cat ./nexus/catalog/migrations/V{?,??}__* | docker exec -i "${{ job.services.catalog.id }}" psql -U postgres) && + docker restart "${{ job.services.catalog.id }}" env: - PG_CDC: empty PGPASSWORD: postgres + - name: Install Temporal CLI + uses: temporalio/setup-temporal@v0 + - name: run tests run: | - gotestsum --format testname -- -p 24 ./... -timeout 1200s + temporal server start-dev --namespace default --headless & + go build -ldflags="-s -w" -o peer-flow + temporal operator search-attribute create --name MirrorName --type Text --namespace default + ./peer-flow worker & + ./peer-flow snapshot-worker & + go test -p 32 ./... -timeout 1200s working-directory: ./flow env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} @@ -122,7 +117,7 @@ jobs: SQLSERVER_PASSWORD: ${{ secrets.SQLSERVER_PASSWORD }} SQLSERVER_DB: ${{ secrets.SQLSERVER_DB }} PEERDB_CATALOG_HOST: localhost - PEERDB_CATALOG_PORT: 7132 + PEERDB_CATALOG_PORT: 5432 PEERDB_CATALOG_USER: postgres PEERDB_CATALOG_PASSWORD: postgres PEERDB_CATALOG_DATABASE: postgres diff --git a/flow/alerting/alerting.go b/flow/alerting/alerting.go index 4e95f1d5d1..a837f58889 100644 --- a/flow/alerting/alerting.go +++ b/flow/alerting/alerting.go @@ -3,7 +3,6 @@ package alerting import ( "context" "encoding/json" - "errors" "fmt" "log/slog" "time" @@ -51,14 +50,14 @@ func (a *Alerter) registerSendersFromPool(ctx context.Context) ([]*slackAlertSen } // doesn't take care of closing pool, needs to be done externally. -func NewAlerter(catalogPool *pgxpool.Pool) (*Alerter, error) { +func NewAlerter(catalogPool *pgxpool.Pool) *Alerter { if catalogPool == nil { - return nil, errors.New("catalog pool is nil for Alerter") + panic("catalog pool is nil for Alerter") } return &Alerter{ catalogPool: catalogPool, - }, nil + } } func (a *Alerter) AlertIfSlotLag(ctx context.Context, peerName string, slotInfo *protos.SlotInfo) { diff --git a/flow/cmd/api.go b/flow/cmd/api.go index f24178efda..5803d42317 100644 --- a/flow/cmd/api.go +++ b/flow/cmd/api.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" diff --git a/flow/cmd/cert.go b/flow/cmd/cert.go index c750378926..9031d55b6c 100644 --- a/flow/cmd/cert.go +++ b/flow/cmd/cert.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "crypto/tls" diff --git a/flow/cmd/handler.go b/flow/cmd/handler.go index 6519373181..f4a7a7cc62 100644 --- a/flow/cmd/handler.go +++ b/flow/cmd/handler.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" diff --git a/flow/cmd/mirror_status.go b/flow/cmd/mirror_status.go index 70dee3c6eb..e8160277f5 100644 --- a/flow/cmd/mirror_status.go +++ b/flow/cmd/mirror_status.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" diff --git a/flow/cmd/peer_data.go b/flow/cmd/peer_data.go index eb45003b41..f4846280ed 100644 --- a/flow/cmd/peer_data.go +++ b/flow/cmd/peer_data.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" diff --git a/flow/cmd/snapshot_worker.go b/flow/cmd/snapshot_worker.go index 07cf5a19dc..eb9021de1a 100644 --- a/flow/cmd/snapshot_worker.go +++ b/flow/cmd/snapshot_worker.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" @@ -25,7 +25,7 @@ type SnapshotWorkerOptions struct { TemporalKey string } -func SnapshotWorkerMain(opts *SnapshotWorkerOptions) error { +func SnapshotWorkerMain(opts *SnapshotWorkerOptions) (client.Client, worker.Worker, error) { clientOptions := client.Options{ HostPort: opts.TemporalHostPort, Namespace: opts.TemporalNamespace, @@ -35,7 +35,7 @@ func SnapshotWorkerMain(opts *SnapshotWorkerOptions) error { if opts.TemporalCert != "" && opts.TemporalKey != "" { certs, err := Base64DecodeCertAndKey(opts.TemporalCert, opts.TemporalKey) if err != nil { - return fmt.Errorf("unable to process certificate and key: %w", err) + return nil, nil, fmt.Errorf("unable to process certificate and key: %w", err) } connOptions := client.ConnectionOptions{ @@ -47,37 +47,29 @@ func SnapshotWorkerMain(opts *SnapshotWorkerOptions) error { clientOptions.ConnectionOptions = connOptions } + conn, err := utils.GetCatalogConnectionPoolFromEnv(context.Background()) + if err != nil { + return nil, nil, fmt.Errorf("unable to create catalog connection pool: %w", err) + } + c, err := client.Dial(clientOptions) if err != nil { - return fmt.Errorf("unable to create Temporal client: %w", err) + return nil, nil, fmt.Errorf("unable to create Temporal client: %w", err) } - defer c.Close() taskQueue := shared.GetPeerFlowTaskQueueName(shared.SnapshotFlowTaskQueue) w := worker.New(c, taskQueue, worker.Options{ EnableSessionWorker: true, + OnFatalError: func(err error) { + slog.Error("Snapshot Worker failed", slog.Any("error", err)) + }, }) - conn, err := utils.GetCatalogConnectionPoolFromEnv(context.Background()) - if err != nil { - return fmt.Errorf("unable to create catalog connection pool: %w", err) - } - - alerter, err := alerting.NewAlerter(conn) - if err != nil { - return fmt.Errorf("unable to create alerter: %w", err) - } - w.RegisterWorkflow(peerflow.SnapshotFlowWorkflow) w.RegisterActivity(&activities.SnapshotActivity{ SnapshotConnections: make(map[string]activities.SlotSnapshotSignal), - Alerter: alerter, + Alerter: alerting.NewAlerter(conn), }) - err = w.Run(worker.InterruptCh()) - if err != nil { - return fmt.Errorf("worker run error: %w", err) - } - - return nil + return c, w, nil } diff --git a/flow/cmd/validate_mirror.go b/flow/cmd/validate_mirror.go index 0ef515a14e..f3169d927d 100644 --- a/flow/cmd/validate_mirror.go +++ b/flow/cmd/validate_mirror.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" diff --git a/flow/cmd/validate_peer.go b/flow/cmd/validate_peer.go index 2ef7d6c063..5bbc1cfb2c 100644 --- a/flow/cmd/validate_peer.go +++ b/flow/cmd/validate_peer.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" diff --git a/flow/cmd/version.go b/flow/cmd/version.go index 3338a20e6a..577ab44994 100644 --- a/flow/cmd/version.go +++ b/flow/cmd/version.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" diff --git a/flow/cmd/worker.go b/flow/cmd/worker.go index 476a274de6..4014d47596 100644 --- a/flow/cmd/worker.go +++ b/flow/cmd/worker.go @@ -1,4 +1,4 @@ -package main +package cmd import ( "context" @@ -7,9 +7,7 @@ import ( "log" "log/slog" "os" - "os/signal" "runtime" - "syscall" "github.com/grafana/pyroscope-go" "go.temporal.io/sdk/client" @@ -74,22 +72,11 @@ func setupPyroscope(opts *WorkerOptions) { } } -func WorkerMain(opts *WorkerOptions) error { +func WorkerMain(opts *WorkerOptions) (client.Client, worker.Worker, error) { if opts.EnableProfiling { setupPyroscope(opts) } - go func() { - sigs := make(chan os.Signal, 1) - signal.Notify(sigs, syscall.SIGQUIT) - buf := make([]byte, 1<<20) - for { - <-sigs - stacklen := runtime.Stack(buf, true) - log.Printf("=== received SIGQUIT ===\n*** goroutine dump...\n%s\n*** end\n", buf[:stacklen]) - } - }() - clientOptions := client.Options{ HostPort: opts.TemporalHostPort, Namespace: opts.TemporalNamespace, @@ -100,7 +87,7 @@ func WorkerMain(opts *WorkerOptions) error { slog.Info("Using temporal certificate/key for authentication") certs, err := Base64DecodeCertAndKey(opts.TemporalCert, opts.TemporalKey) if err != nil { - return fmt.Errorf("unable to process certificate and key: %w", err) + return nil, nil, fmt.Errorf("unable to process certificate and key: %w", err) } connOptions := client.ConnectionOptions{ TLS: &tls.Config{ @@ -113,37 +100,29 @@ func WorkerMain(opts *WorkerOptions) error { conn, err := utils.GetCatalogConnectionPoolFromEnv(context.Background()) if err != nil { - return fmt.Errorf("unable to create catalog connection pool: %w", err) + return nil, nil, fmt.Errorf("unable to create catalog connection pool: %w", err) } c, err := client.Dial(clientOptions) if err != nil { - return fmt.Errorf("unable to create Temporal client: %w", err) + return nil, nil, fmt.Errorf("unable to create Temporal client: %w", err) } slog.Info("Created temporal client") - defer c.Close() taskQueue := shared.GetPeerFlowTaskQueueName(shared.PeerFlowTaskQueue) w := worker.New(c, taskQueue, worker.Options{ EnableSessionWorker: true, + OnFatalError: func(err error) { + slog.Error("Peerflow Worker failed", slog.Any("error", err)) + }, }) peerflow.RegisterFlowWorkerWorkflows(w) - alerter, err := alerting.NewAlerter(conn) - if err != nil { - return fmt.Errorf("unable to create alerter: %w", err) - } - w.RegisterActivity(&activities.FlowableActivity{ CatalogPool: conn, - Alerter: alerter, + Alerter: alerting.NewAlerter(conn), CdcCache: make(map[string]connectors.CDCPullConnector), }) - err = w.Run(worker.InterruptCh()) - if err != nil { - return fmt.Errorf("worker run error: %w", err) - } - - return nil + return c, w, nil } diff --git a/flow/connectors/postgres/postgres_schema_delta_test.go b/flow/connectors/postgres/postgres_schema_delta_test.go index 6196cbc91f..c059c36836 100644 --- a/flow/connectors/postgres/postgres_schema_delta_test.go +++ b/flow/connectors/postgres/postgres_schema_delta_test.go @@ -9,6 +9,7 @@ import ( "github.com/jackc/pgx/v5" "github.com/stretchr/testify/require" + "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" "github.com/PeerDB-io/peer-flow/e2eshared" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model/qvalue" @@ -24,13 +25,7 @@ type PostgresSchemaDeltaTestSuite struct { func SetupSuite(t *testing.T) PostgresSchemaDeltaTestSuite { t.Helper() - connector, err := NewPostgresConnector(context.Background(), &protos.PostgresConfig{ - Host: "localhost", - Port: 7132, - User: "postgres", - Password: "postgres", - Database: "postgres", - }) + connector, err := NewPostgresConnector(context.Background(), utils.GetCatalogPostgresConfigFromEnv()) require.NoError(t, err) setupTx, err := connector.conn.Begin(context.Background()) diff --git a/flow/connectors/postgres/qrep_bench_test.go b/flow/connectors/postgres/qrep_bench_test.go index 406d74a00b..f5882bf299 100644 --- a/flow/connectors/postgres/qrep_bench_test.go +++ b/flow/connectors/postgres/qrep_bench_test.go @@ -4,21 +4,14 @@ import ( "context" "testing" - "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" ) func BenchmarkQRepQueryExecutor(b *testing.B) { query := "SELECT * FROM bench.large_table" ctx := context.Background() - connector, err := NewPostgresConnector(ctx, - &protos.PostgresConfig{ - Host: "localhost", - Port: 7132, - User: "postgres", - Password: "postgres", - Database: "postgres", - }) + connector, err := NewPostgresConnector(ctx, utils.GetCatalogPostgresConfigFromEnv()) if err != nil { b.Fatalf("failed to create connection: %v", err) } diff --git a/flow/connectors/postgres/qrep_partition_test.go b/flow/connectors/postgres/qrep_partition_test.go index 9ed69968a4..0512c68415 100644 --- a/flow/connectors/postgres/qrep_partition_test.go +++ b/flow/connectors/postgres/qrep_partition_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/assert" "go.temporal.io/sdk/log" + "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/shared" ) @@ -63,7 +64,7 @@ func newTestCaseForCTID(schema string, name string, rows uint32, expectedNum int } func TestGetQRepPartitions(t *testing.T) { - const connStr = "postgres://postgres:postgres@localhost:7132/postgres" + connStr := utils.GetCatalogConnectionStringFromEnv() // Setup the DB config, err := pgx.ParseConfig(connStr) diff --git a/flow/connectors/postgres/qrep_query_executor_test.go b/flow/connectors/postgres/qrep_query_executor_test.go index 24eb88a0a9..c8ceaee9a2 100644 --- a/flow/connectors/postgres/qrep_query_executor_test.go +++ b/flow/connectors/postgres/qrep_query_executor_test.go @@ -11,20 +11,13 @@ import ( "github.com/google/uuid" "github.com/jackc/pgx/v5" - "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" ) func setupDB(t *testing.T) (*PostgresConnector, string) { t.Helper() - connector, err := NewPostgresConnector(context.Background(), - &protos.PostgresConfig{ - Host: "localhost", - Port: 7132, - User: "postgres", - Password: "postgres", - Database: "postgres", - }) + connector, err := NewPostgresConnector(context.Background(), utils.GetCatalogPostgresConfigFromEnv()) if err != nil { t.Fatalf("unable to create connector: %v", err) } diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 74a1949ff7..14e95deda2 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -194,20 +194,18 @@ func setupSuite(t *testing.T) PeerFlowE2ETestSuiteBQ { } func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Connection_Config() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, nil, nil) - - // Verify workflow completes - require.True(s.t, env.IsWorkflowCompleted()) - err := env.GetWorkflowError() + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, nil, nil) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.Error(s.t, env.Error()) // assert that error contains "invalid connection configs" - require.Contains(s.t, err.Error(), "invalid connection configs") + require.Contains(s.t, env.Error().Error(), "invalid connection configs") } func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_no_data") dstTableName := "test_no_data" @@ -231,18 +229,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 1 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - e2e.EnvWaitForEqualTables(env, s, "create table", dstTableName, "id,key,value") - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + e2e.EnvWaitForEqualTables(env, s, "create table", dstTableName, "id,key,value") + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_char_coltype") dstTableName := "test_char_coltype" @@ -266,13 +261,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 1 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - e2e.EnvWaitForEqualTables(env, s, "create table", dstTableName, "id,key,value") - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + e2e.EnvWaitForEqualTables(env, s, "create table", dstTableName, "id,key,value") + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } @@ -280,7 +272,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { // The test inserts 10 rows into the source table and verifies that the data is // correctly synced to the destination table after sync flow completes. func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_simple_flow_bq") dstTableName := "test_simple_flow_bq" @@ -304,29 +296,28 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + + // insert 10 rows into the source table + for i := range 10 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(key, value) VALUES ($1, $2) `, srcTableName), testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize inserts", dstTableName, "id,key,value") - env.CancelWorkflow() - }() + e2e.EnvWaitForEqualTables(env, s, "normalize inserts", dstTableName, "id,key,value") - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_bq_1") dstTableName := "test_toast_bq_1" @@ -351,17 +342,17 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* - Executing a transaction which - 1. changes both toast column - 2. changes no toast column - 2. changes 1 toast column - */ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* + Executing a transaction which + 1. changes both toast column + 2. changes no toast column + 2. changes 1 toast column + */ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` BEGIN; INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), 1 FROM generate_series(1,2); @@ -369,19 +360,16 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { UPDATE %s SET t1='dummy' WHERE id=2; END; `, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") - - e2e.EnvWaitForEqualTables(env, s, "normalize tx", dstTableName, "id,t1,t2,k") - env.CancelWorkflow() - }() + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvWaitForEqualTables(env, s, "normalize tx", dstTableName, "id,t1,t2,k") + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_bq_3") dstTableName := "test_toast_bq_3" @@ -406,12 +394,12 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // complex transaction with random DMLs on a table with toast columns - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // complex transaction with random DMLs on a table with toast columns + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` BEGIN; INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), 1 FROM generate_series(1,2); @@ -429,20 +417,17 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { DELETE FROM %s WHERE id=2; END; `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, - srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") + srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", dstTableName, "id,t1,t2,k") - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", dstTableName, "id,t1,t2,k") + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_bq_4") dstTableName := "test_toast_bq_4" @@ -466,12 +451,12 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // complex transaction with random DMLs on a table with toast columns - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // complex transaction with random DMLs on a table with toast columns + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` BEGIN; INSERT INTO %s(t1,k) SELECT random_string(9000), 1 FROM generate_series(1,1); @@ -484,18 +469,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { UPDATE %s SET k=4 WHERE id=1; END; `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", dstTableName, "id,t1,k") - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", dstTableName, "id,t1,k") + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_bq_5") dstTableName := "test_toast_bq_5" @@ -520,15 +502,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* - transaction updating a single row - multiple times with changed/unchanged toast columns - */ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* + transaction updating a single row + multiple times with changed/unchanged toast columns + */ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` BEGIN; INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), 1 FROM generate_series(1,1); @@ -537,18 +519,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { UPDATE %s SET t2='dummy' WHERE id=1; END; `, srcTableName, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", dstTableName, "id,t1,t2,k") - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", dstTableName, "id,t1,t2,k") + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_types_bq") dstTableName := "test_types_bq" @@ -580,12 +559,12 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* test inserting various types*/ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* test inserting various types*/ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s SELECT 2,2,b'1',b'101', true,random_bytea(32),'s','test','1.1.10.2'::cidr, CURRENT_DATE,1.23,1.234,'10.0.0.0/32'::inet,1, @@ -605,46 +584,43 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { '{true, false}'::boolean[], '{1, 2}'::smallint[]; `, srcTableName)) - e2e.EnvNoError(s.t, env, err) - - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize types", func() bool { - noNulls, err := s.bqHelper.CheckNull(dstTableName, []string{ - "c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46", "c47", "c48", - "c49", "c50", "c51", - }) - if err != nil { - s.t.Log(err) - return false - } - - // check if JSON on bigquery side is a good JSON - if err := s.checkJSONValue(dstTableName, "c17", "sai", "-8.021390374331551"); err != nil { - return false - } + e2e.EnvNoError(s.t, env, err) + + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize types", func() bool { + noNulls, err := s.bqHelper.CheckNull(dstTableName, []string{ + "c41", "c1", "c2", "c3", "c4", + "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", + "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", + "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46", "c47", "c48", + "c49", "c50", "c51", + }) + if err != nil { + s.t.Log(err) + return false + } - // check if HSTORE on bigquery side is a good JSON - if err := s.checkJSONValue(dstTableName, "c46", "key1", "\"value1\""); err != nil { - return false - } - if err := s.checkJSONValue(dstTableName, "c46", "key2", "null"); err != nil { - return false - } + // check if JSON on bigquery side is a good JSON + if err := s.checkJSONValue(dstTableName, "c17", "sai", "-8.021390374331551"); err != nil { + return false + } - return noNulls - }) + // check if HSTORE on bigquery side is a good JSON + if err := s.checkJSONValue(dstTableName, "c46", "key1", "\"value1\""); err != nil { + return false + } + if err := s.checkJSONValue(dstTableName, "c46", "key2", "null"); err != nil { + return false + } - env.CancelWorkflow() - }() + return noNulls + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_NaN_Doubles_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_nans_bq") dstTableName := "test_nans_bq" @@ -663,29 +639,28 @@ func (s PeerFlowE2ETestSuiteBQ) Test_NaN_Doubles_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* test inserting various types*/ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + + // test inserting various types + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s SELECT 2, 'NaN'::double precision, '{NaN, Infinity, -Infinity}'; `, srcTableName)) - e2e.EnvNoError(s.t, env, err) + e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize weird floats", func() bool { - good, err := s.bqHelper.CheckDoubleValues(dstTableName, "c1", "c2") - return err == nil && good - }) - env.CancelWorkflow() - }() + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize weird floats", func() bool { + good, err := s.bqHelper.CheckDoubleValues(dstTableName, "c1", "c2") + return err == nil && good + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_invalid_geo_bq_avro_cdc") dstTableName := "test_invalid_geo_bq_avro_cdc" @@ -709,64 +684,61 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 4 invalid shapes and 6 valid shapes into the source table - for range 4 { - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 4 invalid shapes and 6 valid shapes into the source table + for range 4 { + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (line,"polyPoly") VALUES ($1,$2) `, srcTableName), "010200000001000000000000000000F03F0000000000000040", - "0103000020e6100000010000000c0000001a8361d35dc64140afdb8d2b1bc3c9bf1b8ed4685fc641405ba64c"+ - "579dc2c9bf6a6ad95a5fc64140cd82767449c2c9bf9570fbf85ec641408a07944db9c2c9bf729a18a55ec6414021b8b748c7c2c9bfba46de4c"+ - "5fc64140f2567052abc2c9bf2df9c5925fc641409394e16573c2c9bf2df9c5925fc6414049eceda9afc1c9bfdd1cc1a05fc64140fe43faedebc0"+ - "c9bf4694f6065fc64140fe43faedebc0c9bfffe7305f5ec641406693d6f2ddc0c9bf1a8361d35dc64140afdb8d2b1bc3c9bf", - ) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 4 invalid geography rows into the source table") - for range 6 { - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + "0103000020e6100000010000000c0000001a8361d35dc64140afdb8d2b1bc3c9bf1b8ed4685fc641405ba64c"+ + "579dc2c9bf6a6ad95a5fc64140cd82767449c2c9bf9570fbf85ec641408a07944db9c2c9bf729a18a55ec6414021b8b748c7c2c9bfba46de4c"+ + "5fc64140f2567052abc2c9bf2df9c5925fc641409394e16573c2c9bf2df9c5925fc6414049eceda9afc1c9bfdd1cc1a05fc64140fe43faedebc0"+ + "c9bf4694f6065fc64140fe43faedebc0c9bfffe7305f5ec641406693d6f2ddc0c9bf1a8361d35dc64140afdb8d2b1bc3c9bf", + ) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 4 invalid geography rows into the source table") + for range 6 { + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (line,"polyPoly") VALUES ($1,$2) `, srcTableName), "010200000002000000000000000000F03F000000000000004000000000000008400000000000001040", - "010300000001000000050000000000000000000000000000000000000000000000"+ - "00000000000000000000f03f000000000000f03f000000000000f03f0000000000"+ - "00f03f000000000000000000000000000000000000000000000000") - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 6 valid geography rows and 10 total rows into source") - - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize shapes", func() bool { - // We inserted 4 invalid shapes in each, - // which should be filtered out as null on destination. - lineCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.Config.DatasetId, dstTableName, "line") - if err != nil { - return false - } + "010300000001000000050000000000000000000000000000000000000000000000"+ + "00000000000000000000f03f000000000000f03f000000000000f03f0000000000"+ + "00f03f000000000000000000000000000000000000000000000000") + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 6 valid geography rows and 10 total rows into source") - polyCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.Config.DatasetId, dstTableName, "`polyPoly`") - if err != nil { - return false - } + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize shapes", func() bool { + // We inserted 4 invalid shapes in each, + // which should be filtered out as null on destination. + lineCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.Config.DatasetId, dstTableName, "line") + if err != nil { + return false + } - if lineCount != 6 || polyCount != 6 { - s.t.Logf("wrong counts, expect 6 lines 6 polies, not %d lines %d polies", lineCount, polyCount) - return false - } else { - return true - } - }) + polyCount, err := s.bqHelper.countRowsWithDataset(s.bqHelper.Config.DatasetId, dstTableName, "`polyPoly`") + if err != nil { + return false + } - env.CancelWorkflow() - }() + if lineCount != 6 || polyCount != 6 { + s.t.Logf("wrong counts, expect 6 lines 6 polies, not %d lines %d polies", lineCount, polyCount) + return false + } else { + return true + } + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTable1Name := s.attachSchemaSuffix("test1_bq") dstTable1Name := "test1_bq" @@ -789,41 +761,38 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* inserting across multiple tables*/ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* inserting across multiple tables*/ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); `, srcTable1Name, srcTable2Name)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed an insert on two tables") - - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize both tables", func() bool { - count1, err := s.bqHelper.countRows(dstTable1Name) - if err != nil { - return false - } - count2, err := s.bqHelper.countRows(dstTable2Name) - if err != nil { - return false - } + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed an insert on two tables") - return count1 == 1 && count2 == 1 - }) + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize both tables", func() bool { + count1, err := s.bqHelper.countRows(dstTable1Name) + if err != nil { + return false + } + count2, err := s.bqHelper.countRows(dstTable2Name) + if err != nil { + return false + } - env.CancelWorkflow() - }() + return count1 == 1 && count2 == 1 + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } // TODO: not checking schema exactly, add later func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_simple_schema_changes" srcTableName := s.attachSchemaSuffix(tableName) @@ -846,66 +815,63 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. - go func() { - // insert first row. - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + // insert first row. + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES (1)`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted initial row in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted initial row in the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize insert", tableName, "id,c1") + e2e.EnvWaitForEqualTables(env, s, "normalize insert", tableName, "id,c1") - // alter source table, add column c2 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, add column c2 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s ADD COLUMN c2 BIGINT`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, added column c2") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, added column c2") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2) VALUES (2,2)`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row with added c2 in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row with added c2 in the source table") - // verify we got our two rows, if schema did not match up it will error. - e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1,c2") + // verify we got our two rows, if schema did not match up it will error. + e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1,c2") - // alter source table, add column c3, drop column c2 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, add column c3, drop column c2 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c2, ADD COLUMN c3 BIGINT`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, dropped column c2 and added column c3") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, dropped column c2 and added column c3") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c3) VALUES (3,3)`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row with added c3 in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row with added c3 in the source table") - // verify we got our two rows, if schema did not match up it will error. - e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1,c3") + // verify we got our two rows, if schema did not match up it will error. + e2e.EnvWaitForEqualTables(env, s, "normalize altered row", tableName, "id,c1,c3") - // alter source table, drop column c3 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, drop column c3 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c3`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, dropped column c3") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, dropped column c3") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES (4)`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row after dropping all columns in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row after dropping all columns in the source table") - // verify we got our two rows, if schema did not match up it will error. - e2e.EnvWaitForEqualTables(env, s, "normalize drop column", tableName, "id,c1") + // verify we got our two rows, if schema did not match up it will error. + e2e.EnvWaitForEqualTables(env, s, "normalize drop column", tableName, "id,c1") - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_simple_cpkey" srcTableName := s.attachSchemaSuffix("test_simple_cpkey") @@ -931,40 +897,37 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t) VALUES ($1,$2) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - // verify we got our 10 rows - e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1,c2,t") - - _, err := s.Conn().Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize update", tableName, "id,c1,c2,t") + // verify we got our 10 rows + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1,c2,t") - env.CancelWorkflow() - }() + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvWaitForEqualTables(env, s, "normalize update", tableName, "id,c1,c2,t") + + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_cpkey_toast1") dstTableName := "test_cpkey_toast1" @@ -991,41 +954,39 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - rowsTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) - - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + rowsTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) + + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - _, err = rowsTx.Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - err = rowsTx.Commit(context.Background()) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize tx", dstTableName, "id,c1,c2,t,t2") - env.CancelWorkflow() - }() + _, err = rowsTx.Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) + + err = rowsTx.Commit(context.Background()) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize tx", dstTableName, "id,c1,c2,t,t2") - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_cpkey_toast2" srcTableName := s.attachSchemaSuffix("test_cpkey_toast2") @@ -1052,38 +1013,35 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c2,t,t2") - _, err = s.Conn().Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize update", tableName, "id,c2,t,t2") + } + s.t.Log("Inserted 10 rows into the source table") - env.CancelWorkflow() - }() + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c2,t,t2") + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize update", tableName, "id,c2,t,t2") - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_peerdb_cols") dstTableName := "test_peerdb_cols_dst" @@ -1106,34 +1064,31 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 1 row into the source table - testKey := fmt.Sprintf("test_key_%d", 1) - testValue := fmt.Sprintf("test_value_%d", 1) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 1 row into the source table + testKey := fmt.Sprintf("test_key_%d", 1) + testValue := fmt.Sprintf("test_value_%d", 1) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(key, value) VALUES ($1, $2) `, srcTableName), testKey, testValue) - e2e.EnvNoError(s.t, env, err) + e2e.EnvNoError(s.t, env, err) - // delete that row - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // delete that row + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1 `, srcTableName)) - e2e.EnvNoError(s.t, env, err) - - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize insert/delete", func() bool { - return s.checkPeerdbColumns(dstTableName, true) == nil - }) - env.CancelWorkflow() - }() + e2e.EnvNoError(s.t, env, err) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize insert/delete", func() bool { + return s.checkPeerdbColumns(dstTableName, true) == nil + }) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTable1Name := s.attachSchemaSuffix("test1_bq") dstTable1Name := "test1_bq" @@ -1160,41 +1115,39 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* inserting across multiple tables*/ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* inserting across multiple tables*/ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); `, srcTable1Name, srcTable2Name)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed an insert on two tables") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed an insert on two tables") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize multi dataset", func() bool { - count1, err := s.bqHelper.countRows(dstTable1Name) - if err != nil { - return false - } - count2, err := s.bqHelper.countRowsWithDataset(secondDataset, dstTable2Name, "") - if err != nil { - return false - } + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize multi dataset", func() bool { + count1, err := s.bqHelper.countRows(dstTable1Name) + if err != nil { + return false + } + count2, err := s.bqHelper.countRowsWithDataset(secondDataset, dstTable2Name, "") + if err != nil { + return false + } - return count1 == 1 && count2 == 1 - }) - env.CancelWorkflow() - }() + return count1 == 1 && count2 == 1 + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) require.NoError(s.t, s.bqHelper.DropDataset(secondDataset)) } func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_softdel" srcName := "test_softdel_src" @@ -1231,38 +1184,35 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, tableName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, tableName, "id,c1,c2,t") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", srcName, tableName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", srcName, tableName, "id,c1,c2,t") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { - pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, srcName, "id,c1,c2,t") - if err != nil { - return false - } - rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") - if err != nil { - return false - } - return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) - }) - - env.CancelWorkflow() - }() + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { + pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, srcName, "id,c1,c2,t") + if err != nil { + return false + } + rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) newerSyncedAtQuery := fmt.Sprintf( @@ -1274,7 +1224,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { } func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) cmpTableName := s.attachSchemaSuffix("test_softdel_iud") srcTableName := cmpTableName + "_src" @@ -1311,48 +1261,45 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - insertTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) + insertTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - // since we delete stuff, create another table to compare with - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + // since we delete stuff, create another table to compare with + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) + e2e.EnvNoError(s.t, env, err) + + e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_softdel_iud", "id,c1,c2,t") + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "checking soft delete", func() bool { + newerSyncedAtQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", + s.bqHelper.Config.DatasetId, dstTableName) + numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) e2e.EnvNoError(s.t, env, err) + return numNewRows == 1 + }) - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_softdel_iud", "id,c1,c2,t") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "checking soft delete", func() bool { - newerSyncedAtQuery := fmt.Sprintf( - "SELECT COUNT(*) FROM `%s.%s` WHERE _PEERDB_IS_DELETED", - s.bqHelper.Config.DatasetId, dstTableName) - numNewRows, err := s.bqHelper.RunInt64Query(newerSyncedAtQuery) - e2e.EnvNoError(s.t, env, err) - return numNewRows == 1 - }) - - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcName := "test_softdel_ud_src" srcTableName := s.attachSchemaSuffix(srcName) @@ -1389,43 +1336,40 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, dstName, "id,c1,c2,t") + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, dstName, "id,c1,c2,t") - insertTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + insertTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET t=random_string(10000) WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { - pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, srcName, "id,c1,c2,t") - e2e.EnvNoError(s.t, env, err) - rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") - if err != nil { - return false - } - return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) - }) + e2e.EnvNoError(s.t, env, err) + e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - env.CancelWorkflow() - }() + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { + pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, srcName, "id,c1,c2,t") + e2e.EnvNoError(s.t, env, err) + rows, err := s.GetRowsWhere(dstName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) newerSyncedAtQuery := fmt.Sprintf( @@ -1437,7 +1381,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { } func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_softdel_iad" srcTableName := s.attachSchemaSuffix(tableName) @@ -1473,38 +1417,35 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize insert", tableName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize insert", tableName, "id,c1,c2,t") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { - pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, tableName, "id,c1,c2,t") - if err != nil { - return false - } - rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") - if err != nil { - return false - } - return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) - }) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { + pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, tableName, "id,c1,c2,t") + if err != nil { + return false + } + rows, err := s.GetRowsWhere(tableName, "id,c1,c2,t", "NOT _PEERDB_IS_DELETED") + if err != nil { + return false + } + return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) + }) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1,c2,t") - - env.CancelWorkflow() - }() + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1,c2,t") - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) newerSyncedAtQuery := fmt.Sprintf( @@ -1516,8 +1457,6 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { } func (s PeerFlowE2ETestSuiteBQ) Test_JSON_PKey_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - srcTableName := s.attachSchemaSuffix("test_json_pkey_bq") dstTableName := "test_json_pkey_bq" @@ -1546,24 +1485,24 @@ func (s PeerFlowE2ETestSuiteBQ) Test_JSON_PKey_BQ() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - testJson := `'{"name":"jack", "age":12, "spouse":null}'::json` - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(key, value, j) VALUES ($1, $2, %s) - `, srcTableName, testJson), testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 10 rows into the source table + for i := range 10 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + testJson := `'{"name":"jack", "age":12, "spouse":null}'::json` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s(key, value, j) VALUES ($1, $2, %s) + `, srcTableName, testJson), testKey, testValue) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize inserts", dstTableName, "id,key,value,j") - env.CancelWorkflow() - }() + e2e.EnvWaitForEqualTables(env, s, "normalize inserts", dstTableName, "id,key,value,j") - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } diff --git a/flow/e2e/bigquery/qrep_flow_bq_test.go b/flow/e2e/bigquery/qrep_flow_bq_test.go index 4fdfb52e95..c7f6a5c7f8 100644 --- a/flow/e2e/bigquery/qrep_flow_bq_test.go +++ b/flow/e2e/bigquery/qrep_flow_bq_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/stretchr/testify/require" @@ -57,7 +58,7 @@ func (s PeerFlowE2ETestSuiteBQ) setupTimeTable(tableName string) { } func (s PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -76,20 +77,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { true, "") require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) e2e.RequireEqualTables(s, tblName, "*") } func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Timestamps_And_Date_QRep() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - + tc := e2e.NewTemporalClient(s.t) tblName := "test_invalid_time_bq" s.setupTimeTable(tblName) @@ -106,13 +102,9 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Timestamps_And_Date_QRep() { "") qrepConfig.WatermarkColumn = "watermark_ts" require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) goodValues := []string{"watermark_ts", "mydate", "medieval"} badValues := []string{"mytimestamp", "mytztimestamp", "mybaddate"} @@ -131,7 +123,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Timestamps_And_Date_QRep() { } func (s PeerFlowE2ETestSuiteBQ) Test_PeerDB_Columns_QRep_BQ() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -150,13 +142,9 @@ func (s PeerFlowE2ETestSuiteBQ) Test_PeerDB_Columns_QRep_BQ() { true, "_PEERDB_SYNCED_AT") require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) err = s.checkPeerdbColumns(tblName, false) require.NoError(s.t, err) diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 0ec31befe4..192ee85ba9 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -12,9 +12,6 @@ import ( "github.com/jackc/pgx/v5/pgtype" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "go.temporal.io/sdk/testsuite" - "go.temporal.io/sdk/worker" - "go.temporal.io/sdk/workflow" "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/e2e" @@ -55,7 +52,7 @@ func (s PeerFlowE2ETestSuitePG) checkPeerdbColumns(dstSchemaQualified string, ro } func (s PeerFlowE2ETestSuitePG) WaitForSchema( - env *testsuite.TestWorkflowEnvironment, + env e2e.WorkflowRun, reason string, srcTableName string, dstTableName string, @@ -81,8 +78,6 @@ func (s PeerFlowE2ETestSuitePG) WaitForSchema( } func (s PeerFlowE2ETestSuitePG) Test_Simple_Flow_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - srcTableName := s.attachSchemaSuffix("test_simple_flow") dstTableName := s.attachSchemaSuffix("test_simple_flow_dst") @@ -105,34 +100,30 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Flow_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and then insert 10 rows into the source table - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(key, value, myh) VALUES ($1, $2, '"a"=>"b"') - `, srcTableName), testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,key,value") == nil - }) - env.CancelWorkflow() - }() + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 10 rows into the source table + for i := range 10 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s(key, value, myh) VALUES ($1, $2, '"a"=>"b"') + `, srcTableName), testKey, testValue) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") + + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,key,value") == nil + }) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Geospatial_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - srcTableName := s.attachSchemaSuffix("test_geospatial_pg") dstTableName := s.attachSchemaSuffix("test_geospatial_pg_dst") @@ -154,27 +145,27 @@ func (s PeerFlowE2ETestSuitePG) Test_Geospatial_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 1 row into the source table - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(gg, gm) VALUES ('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))','LINESTRING(0 0, 1 1, 2 2)') - `, srcTableName)) - e2e.EnvNoError(s.t, env, err) + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) - s.t.Log("Inserted 1 row into the source table") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize shapes", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,gg,gm") == nil - }) - env.CancelWorkflow() - }() + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 1 row into the source table + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s(gg, gm) VALUES ('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))','LINESTRING(0 0, 1 1, 2 2)') + `, srcTableName)) + e2e.EnvNoError(s.t, env, err) + + s.t.Log("Inserted 1 row into the source table") + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize shapes", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,gg,gm") == nil + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_types_pg") dstTableName := s.attachSchemaSuffix("test_types_pg_dst") @@ -199,9 +190,9 @@ func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s SELECT 2,2,b'1', true,'s','test','1.1.10.2'::cidr, CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, @@ -217,27 +208,25 @@ func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { '{true, false}'::boolean[], '{1,2}'::smallint[]; `, srcTableName)) - e2e.EnvNoError(s.t, env, err) - - s.t.Log("Inserted 1 row into the source table") - allCols := strings.Join([]string{ - "c1", "c2", "c4", - "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", - "c21", "c29", "c33", "c34", "c35", "c36", - "c7", "c8", "c32", "c42", "c43", "c44", "c46", "c47", "c48", "c49", "c50", - }, ",") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize types", func() bool { - return s.comparePGTables(srcTableName, dstTableName, allCols) == nil - }) - env.CancelWorkflow() - }() + e2e.EnvNoError(s.t, env, err) + + s.t.Log("Inserted 1 row into the source table") + allCols := strings.Join([]string{ + "c1", "c2", "c4", + "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", + "c21", "c29", "c33", "c34", "c35", "c36", + "c7", "c8", "c32", "c42", "c43", "c44", "c46", "c47", "c48", "c49", "c50", + }, ",") + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize types", func() bool { + return s.comparePGTables(srcTableName, dstTableName, allCols) == nil + }) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Enums_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_enum_flow") dstTableName := s.attachSchemaSuffix("test_enum_flow_dst") @@ -265,26 +254,24 @@ func (s PeerFlowE2ETestSuitePG) Test_Enums_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(my_mood, my_null_mood) VALUES ('happy',null) `, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted enums into the source table") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize enum", func() bool { - return s.checkEnums(srcTableName, dstTableName) == nil - }) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted enums into the source table") + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize enum", func() bool { + return s.checkEnums(srcTableName, dstTableName) == nil + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_simple_schema_changes") dstTableName := s.attachSchemaSuffix("test_simple_schema_changes_dst") @@ -306,168 +293,167 @@ func (s PeerFlowE2ETestSuitePG) Test_Simple_Schema_Changes_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 1 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. - go func() { - // insert first row. - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + + // insert first row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted initial row in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted initial row in the source table") - s.WaitForSchema(env, "normalizing first row", srcTableName, dstTableName, "id,c1", &protos.TableSchema{ - TableIdentifier: dstTableName, - PrimaryKeyColumns: []string{"id"}, - Columns: []*protos.FieldDescription{ - { - Name: "id", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "c1", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, + s.WaitForSchema(env, "normalizing first row", srcTableName, dstTableName, "id,c1", &protos.TableSchema{ + TableIdentifier: dstTableName, + PrimaryKeyColumns: []string{"id"}, + Columns: []*protos.FieldDescription{ + { + Name: "id", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, }, - }) + { + Name: "c1", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + }, + }) - // alter source table, add column c2 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, add column c2 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s ADD COLUMN c2 BIGINT`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, added column c2") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, added column c2") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2) VALUES ($1,$2)`, srcTableName), 2, 2) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row with added c2 in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row with added c2 in the source table") - s.WaitForSchema(env, "normalizing altered row", srcTableName, dstTableName, "id,c1,c2", &protos.TableSchema{ - TableIdentifier: dstTableName, - PrimaryKeyColumns: []string{"id"}, - Columns: []*protos.FieldDescription{ - { - Name: "id", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "c1", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, - { - Name: "c2", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, + s.WaitForSchema(env, "normalizing altered row", srcTableName, dstTableName, "id,c1,c2", &protos.TableSchema{ + TableIdentifier: dstTableName, + PrimaryKeyColumns: []string{"id"}, + Columns: []*protos.FieldDescription{ + { + Name: "id", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, }, - }) + { + Name: "c1", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + { + Name: "c2", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + }, + }) - // alter source table, add column c3, drop column c2 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, add column c3, drop column c2 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c2, ADD COLUMN c3 BIGINT`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, dropped column c2 and added column c3") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, dropped column c2 and added column c3") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c3) VALUES ($1,$2)`, srcTableName), 3, 3) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row with added c3 in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row with added c3 in the source table") - s.WaitForSchema(env, "normalizing dropped column row", srcTableName, dstTableName, "id,c1,c3", &protos.TableSchema{ - TableIdentifier: dstTableName, - PrimaryKeyColumns: []string{"id"}, - Columns: []*protos.FieldDescription{ - { - Name: "id", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "c1", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "c2", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, - { - Name: "c3", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, + s.WaitForSchema(env, "normalizing dropped column row", srcTableName, dstTableName, "id,c1,c3", &protos.TableSchema{ + TableIdentifier: dstTableName, + PrimaryKeyColumns: []string{"id"}, + Columns: []*protos.FieldDescription{ + { + Name: "id", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, }, - }) + { + Name: "c1", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + { + Name: "c2", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + { + Name: "c3", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + }, + }) - // alter source table, drop column c3 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, drop column c3 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c3`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, dropped column c3") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, dropped column c3") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 4) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row after dropping all columns in the source table") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row after dropping all columns in the source table") - s.WaitForSchema(env, "normalizing 2nd dropped column row", srcTableName, dstTableName, "id,c1", &protos.TableSchema{ - TableIdentifier: dstTableName, - PrimaryKeyColumns: []string{"id"}, - Columns: []*protos.FieldDescription{ - { - Name: "id", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "c1", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, - { - Name: "c2", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, - { - Name: "c3", - Type: string(qvalue.QValueKindInt64), - TypeModifier: -1, - }, + s.WaitForSchema(env, "normalizing 2nd dropped column row", srcTableName, dstTableName, "id,c1", &protos.TableSchema{ + TableIdentifier: dstTableName, + PrimaryKeyColumns: []string{"id"}, + Columns: []*protos.FieldDescription{ + { + Name: "id", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, }, - }) + { + Name: "c1", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + { + Name: "c2", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + { + Name: "c3", + Type: string(qvalue.QValueKindInt64), + TypeModifier: -1, + }, + }, + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_simple_cpkey") dstTableName := s.attachSchemaSuffix("test_simple_cpkey_dst") @@ -492,41 +478,39 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t) VALUES ($1,$2) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil - }) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) - _, err := s.Conn().Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize modifications", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil - }) - env.CancelWorkflow() - }() + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize modifications", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_cpkey_toast1") randomString := s.attachSchemaSuffix("random_string") @@ -556,44 +540,42 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - rowsTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) - - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + rowsTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) + + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,%s(9000)) `, srcTableName, randomString), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - _, err = rowsTx.Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - err = rowsTx.Commit(context.Background()) - e2e.EnvNoError(s.t, env, err) + _, err = rowsTx.Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize tx", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil - }) - env.CancelWorkflow() - }() + err = rowsTx.Commit(context.Background()) + e2e.EnvNoError(s.t, env, err) + + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize tx", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil + }) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_cpkey_toast2") randomString := s.attachSchemaSuffix("random_string") @@ -623,43 +605,41 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,%s(9000)) `, srcTableName, randomString), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil - }) - _, err = s.Conn().Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize update", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil - }) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize 10 rows", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil + }) + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) + + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize update", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t,t2") == nil + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_peerdb_cols") dstTableName := s.attachSchemaSuffix("test_peerdb_cols_dst") @@ -683,35 +663,33 @@ func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 1 row into the source table - testKey := fmt.Sprintf("test_key_%d", 1) - testValue := fmt.Sprintf("test_value_%d", 1) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 1 row into the source table + testKey := fmt.Sprintf("test_key_%d", 1) + testValue := fmt.Sprintf("test_value_%d", 1) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(key, value) VALUES ($1, $2) `, srcTableName), testKey, testValue) - e2e.EnvNoError(s.t, env, err) + e2e.EnvNoError(s.t, env, err) - // delete that row - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // delete that row + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1 `, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted and deleted a row for peerdb column check") + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted and deleted a row for peerdb column check") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize insert/delete", func() bool { - return s.checkPeerdbColumns(dstTableName, 1) == nil - }) - env.CancelWorkflow() - }() + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize insert/delete", func() bool { + return s.checkPeerdbColumns(dstTableName, 1) == nil + }) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) cmpTableName := s.attachSchemaSuffix("test_softdel") srcTableName := cmpTableName + "_src" @@ -748,39 +726,36 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil - }) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize update", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil - }) - // since we delete stuff, create another table to compare with - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize update", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) + // since we delete stuff, create another table to compare with + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) + e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { - return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil - }) - - env.CancelWorkflow() - }() + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { + return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) // verify our updates and delete happened @@ -797,7 +772,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { } func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_IUD_Same_Batch() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) cmpTableName := s.attachSchemaSuffix("test_softdel_iud") srcTableName := cmpTableName + "_src" @@ -834,50 +809,45 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_IUD_Same_Batch() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - insertTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) + insertTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - // since we delete stuff, create another table to compare with - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + // since we delete stuff, create another table to compare with + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize tx", func() bool { - return s.comparePGTables(cmpTableName, dstTableName, "id,c1,c2,t") == nil - }) + e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - softDeleteQuery := fmt.Sprintf(`SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"`, dstTableName) - e2e.EnvWaitFor(s.t, env, time.Minute, "normalize soft delete", func() bool { - numRows, err := s.RunInt64Query(softDeleteQuery) - return err == nil && numRows == 1 - }) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize tx", func() bool { + return s.comparePGTables(cmpTableName, dstTableName, "id,c1,c2,t") == nil + }) - env.CancelWorkflow() - }() + softDeleteQuery := fmt.Sprintf(`SELECT COUNT(*) FROM %s WHERE "_PEERDB_IS_DELETED"`, dstTableName) + e2e.EnvWaitFor(s.t, env, time.Minute, "normalize soft delete", func() bool { + numRows, err := s.RunInt64Query(softDeleteQuery) + return err == nil && numRows == 1 + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - cmpTableName := s.attachSchemaSuffix("test_softdel_ud") srcTableName := cmpTableName + "_src" dstTableName := s.attachSchemaSuffix("test_softdel_ud_dst") @@ -913,40 +883,38 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil - }) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) - insertTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + insertTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET t=random_string(10000) WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + e2e.EnvNoError(s.t, env, err) + e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { - return s.comparePGTables(srcTableName, - dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil - }) - - env.CancelWorkflow() - }() + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize transaction", func() bool { + return s.comparePGTables(srcTableName, + dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) // verify our updates and delete happened @@ -961,7 +929,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { } func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_softdel_iad") dstTableName := s.attachSchemaSuffix("test_softdel_iad_dst") @@ -997,34 +965,31 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil - }) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize row", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { - return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil - }) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { + return s.comparePGTables(srcTableName, dstTableName+` WHERE NOT "_PEERDB_IS_DELETED"`, "id,c1,c2,t") == nil + }) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize reinsert", func() bool { - return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil - }) - - env.CancelWorkflow() - }() + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize reinsert", func() bool { + return s.comparePGTables(srcTableName, dstTableName, "id,c1,c2,t") == nil + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) softDeleteQuery := fmt.Sprintf(` @@ -1036,7 +1001,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { } func (s PeerFlowE2ETestSuitePG) Test_Supported_Mixed_Case_Table() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) stmtSrcTableName := fmt.Sprintf(`e2e_test_%s."%s"`, s.suffix, "testMixedCase") srcTableName := s.attachSchemaSuffix("testMixedCase") @@ -1071,48 +1036,37 @@ func (s PeerFlowE2ETestSuitePG) Test_Supported_Mixed_Case_Table() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 20 rows into the source table - for i := range 10 { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 20 rows into the source table + for i := range 10 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s ("highGold","eVe") VALUES ($1, $2) `, stmtSrcTableName), testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 20 rows into the source table") - - e2e.EnvWaitFor(s.t, env, 1*time.Minute, "normalize mixed case", func() bool { - return s.comparePGTables(stmtSrcTableName, stmtDstTableName, - "id,\"pulseArmor\",\"highGold\",\"eVe\"") == nil - }) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 20 rows into the source table") - env.CancelWorkflow() - }() + e2e.EnvWaitFor(s.t, env, 1*time.Minute, "normalize mixed case", func() bool { + return s.comparePGTables(stmtSrcTableName, stmtDstTableName, + "id,\"pulseArmor\",\"highGold\",\"eVe\"") == nil + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) + env.Cancel() + e2e.RequireEnvCanceled(s.t, env) } // test don't work, make it work later func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - // needed otherwise errors out - workerOptions := worker.Options{ - EnableSessionWorker: true, - } - env.SetWorkerOptions(workerOptions) - srcTable1Name := s.attachSchemaSuffix("test_dynconfig_1") srcTable2Name := s.attachSchemaSuffix("test_dynconfig_2") dstTable1Name := s.attachSchemaSuffix("test_dynconfig_1_dst") dstTable2Name := s.attachSchemaSuffix("test_dynconfig_2_dst") - sentPause := false - sentUpdate := false _, err := s.Conn().Exec(context.Background(), fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( @@ -1147,6 +1101,9 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { SnapshotNumTablesInParallel: 1, } + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + addRows := func(numRows int) { for range numRows { _, err = s.Conn().Exec(context.Background(), @@ -1161,7 +1118,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { getWorkflowState := func() peerflow.CDCFlowWorkflowState { var state peerflow.CDCFlowWorkflowState - val, err := env.QueryWorkflow(shared.CDCFlowStateQuery) + val, err := env.Query(shared.CDCFlowStateQuery) e2e.EnvNoError(s.t, env, err) err = val.Get(&state) e2e.EnvNoError(s.t, env, err) @@ -1171,7 +1128,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { getFlowStatus := func() protos.FlowStatus { var flowStatus protos.FlowStatus - val, err := env.QueryWorkflow(shared.FlowStatusQuery) + val, err := env.Query(shared.FlowStatusQuery) e2e.EnvNoError(s.t, env, err) err = val.Get(&flowStatus) e2e.EnvNoError(s.t, env, err) @@ -1179,101 +1136,60 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { return flowStatus } - var workflowState peerflow.CDCFlowWorkflowState - - // signals in tests are weird, you need to register them before starting the workflow - // otherwise you guessed it, errors out. really don't like this. - // too short of a gap between signals also causes issues - // might have something to do with how test workflows handle fast-forwarding time. - env.RegisterDelayedCallback(func() { - workflowState = getWorkflowState() - e2e.EnvSignalWorkflow(env, model.FlowSignal, model.PauseSignal) - s.t.Log("Sent pause signal") - sentPause = true - }, 28*time.Second) - // add before to test initial load too. addRows(18) - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 18 rows into the source tables, exactly 3 batches - addRows(18) - - e2e.EnvWaitFor(s.t, env, 1*time.Minute, "normalize 18 records - first table", func() bool { - return s.comparePGTables(srcTable1Name, dstTable1Name, "id,t") == nil - }) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 18 rows into the source tables, exactly 3 batches + addRows(18) - workflowState = getWorkflowState() - assert.EqualValues(s.t, 7, workflowState.SyncFlowOptions.IdleTimeoutSeconds) - assert.EqualValues(s.t, 6, workflowState.SyncFlowOptions.BatchSize) - assert.Len(s.t, workflowState.SyncFlowOptions.TableMappings, 1) - assert.Len(s.t, workflowState.SyncFlowOptions.SrcTableIdNameMapping, 1) - assert.Len(s.t, workflowState.SyncFlowOptions.TableNameSchemaMapping, 1) - - if !s.t.Failed() { - // wait for first RegisterDelayedCallback to hit. - e2e.EnvWaitFor(s.t, env, 1*time.Minute, "sent pause signal", func() bool { - return sentPause - }) - } else { - env.CancelWorkflow() - } - }() + e2e.EnvWaitFor(s.t, env, 1*time.Minute, "normalize 18 records - first table", func() bool { + return s.comparePGTables(srcTable1Name, dstTable1Name, "id,t") == nil + }) - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) - err = env.GetWorkflowError() - if !workflow.IsContinueAsNewError(err) { - require.NoError(s.t, err) - require.Error(s.t, err) - } - workflowState.ActiveSignal = model.PauseSignal - env = e2e.NewTemporalTestWorkflowEnvironment(s.t) + workflowState := getWorkflowState() + assert.EqualValues(s.t, 7, workflowState.SyncFlowOptions.IdleTimeoutSeconds) + assert.EqualValues(s.t, 6, workflowState.SyncFlowOptions.BatchSize) + assert.Len(s.t, workflowState.SyncFlowOptions.TableMappings, 1) + assert.Len(s.t, workflowState.SyncFlowOptions.SrcTableIdNameMapping, 1) + assert.Len(s.t, workflowState.SyncFlowOptions.TableNameSchemaMapping, 1) + + if !s.t.Failed() { + addRows(1) + e2e.SignalWorkflow(env, model.FlowSignal, model.PauseSignal) + addRows(1) + e2e.EnvWaitFor(s.t, env, 1*time.Minute, "paused workflow", func() bool { + // keep adding 1 more row - finishing another sync + addRows(1) - // this signal being sent also unblocks another WaitFor - env.RegisterDelayedCallback(func() { - e2e.EnvWaitFor(s.t, env, 1*time.Minute, "send update signal after pause confirmed", func() bool { flowStatus := getFlowStatus() - if flowStatus != protos.FlowStatus_STATUS_PAUSED { - return false - } - e2e.EnvSignalWorkflow(env, model.CDCDynamicPropertiesSignal, &protos.CDCFlowConfigUpdate{ - IdleTimeout: 14, - BatchSize: 12, - AdditionalTables: []*protos.TableMapping{ - { - SourceTableIdentifier: srcTable2Name, - DestinationTableIdentifier: dstTable2Name, - }, - }, - }) - s.t.Log("Sent update signal") - sentUpdate = true - return true + return flowStatus == protos.FlowStatus_STATUS_PAUSED }) - }, 56*time.Second) - go func() { - // we have a paused mirror, wait for second signal to hit. - e2e.EnvWaitFor(s.t, env, 1*time.Minute, "sent updates signal", func() bool { - return sentUpdate + e2e.SignalWorkflow(env, model.CDCDynamicPropertiesSignal, &protos.CDCFlowConfigUpdate{ + IdleTimeout: 14, + BatchSize: 12, + AdditionalTables: []*protos.TableMapping{ + { + SourceTableIdentifier: srcTable2Name, + DestinationTableIdentifier: dstTable2Name, + }, + }, }) // add rows to both tables before resuming - should handle addRows(18) + e2e.SignalWorkflow(env, model.FlowSignal, model.NoopSignal) + e2e.EnvWaitFor(s.t, env, 1*time.Minute, "resumed workflow", func() bool { return getFlowStatus() == protos.FlowStatus_STATUS_RUNNING }) e2e.EnvWaitFor(s.t, env, 1*time.Minute, "normalize 18 records - first table", func() bool { return s.comparePGTables(srcTable1Name, dstTable1Name, "id,t") == nil }) - /* TODO fix in integration tests e2e.EnvWaitFor(s.t, env, 2*time.Minute, "initial load + normalize 18 records - second table", func() bool { - err := s.comparePGTables(srcTable2Name, dstTable2Name, "id,t") - s.t.Log("TEST", err) - return err == nil + return s.comparePGTables(srcTable2Name, dstTable2Name, "id,t") == nil }) - */ workflowState = getWorkflowState() assert.EqualValues(s.t, 14, workflowState.SyncFlowOptions.IdleTimeoutSeconds) @@ -1281,9 +1197,8 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { assert.Len(s.t, workflowState.SyncFlowOptions.TableMappings, 2) assert.Len(s.t, workflowState.SyncFlowOptions.SrcTableIdNameMapping, 2) assert.Len(s.t, workflowState.SyncFlowOptions.TableNameSchemaMapping, 2) - env.CancelWorkflow() - }() + } - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, &workflowState) + env.Cancel() e2e.RequireEnvCanceled(s.t, env) } diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index e4605ce592..a7a3672119 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -215,8 +215,6 @@ func (s PeerFlowE2ETestSuitePG) TestSimpleSlotCreation() { } func (s PeerFlowE2ETestSuitePG) Test_Complete_QRep_Flow_Multi_Insert_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - numRows := 10 srcTable := "test_qrep_flow_avro_pg_1" @@ -247,21 +245,16 @@ func (s PeerFlowE2ETestSuitePG) Test_Complete_QRep_Flow_Multi_Insert_PG() { ) require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + tc := e2e.NewTemporalClient(s.t) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) err = s.comparePGTables(srcSchemaQualified, dstSchemaQualified, "*") require.NoError(s.t, err) } func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns_QRep_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - numRows := 10 srcTable := "test_qrep_columns_pg_1" @@ -289,21 +282,16 @@ func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns_QRep_PG() { ) require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + tc := e2e.NewTemporalClient(s.t) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) err = s.checkSyncedAt(dstSchemaQualified) require.NoError(s.t, err) } func (s PeerFlowE2ETestSuitePG) Test_No_Rows_QRep_PG() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) - numRows := 0 srcTable := "test_no_rows_qrep_pg_1" @@ -331,11 +319,8 @@ func (s PeerFlowE2ETestSuitePG) Test_No_Rows_QRep_PG() { ) require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + tc := e2e.NewTemporalClient(s.t) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) } diff --git a/flow/e2e/s3/cdc_s3_test.go b/flow/e2e/s3/cdc_s3_test.go index 8a976b8a0e..9f49195b3a 100644 --- a/flow/e2e/s3/cdc_s3_test.go +++ b/flow/e2e/s3/cdc_s3_test.go @@ -20,7 +20,7 @@ func (s PeerFlowE2ETestSuiteS3) attachSuffix(input string) string { } func (s PeerFlowE2ETestSuiteS3) Test_Complete_Simple_Flow_S3() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_simple_flow_s3") dstTableName := fmt.Sprintf("%s.%s", "peerdb_test_s3", "test_simple_flow_s3") @@ -42,31 +42,29 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_Simple_Flow_S3() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 5 - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 20 rows - for i := 1; i <= 20; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.conn.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 20 rows + for i := 1; i <= 20; i++ { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.conn.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (key, value) VALUES ($1, $2) `, srcTableName), testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } e2e.EnvNoError(s.t, env, err) + } + e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "waiting for blobs", func() bool { - ctx, cancel := context.WithTimeout(context.Background(), 25*time.Second) - defer cancel() - files, err := s.s3Helper.ListAllFiles(ctx, flowJobName) - s.t.Logf("Files in Test_Complete_Simple_Flow_S3 %s: %d", flowJobName, len(files)) - e2e.EnvNoError(s.t, env, err) - return len(files) == 4 - }) + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "waiting for blobs", func() bool { + ctx, cancel := context.WithTimeout(context.Background(), 25*time.Second) + defer cancel() + files, err := s.s3Helper.ListAllFiles(ctx, flowJobName) + s.t.Logf("Files in Test_Complete_Simple_Flow_S3 %s: %d", flowJobName, len(files)) + e2e.EnvNoError(s.t, env, err) + return len(files) == 4 + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go index 54a66f0ed0..240f3a78b9 100644 --- a/flow/e2e/s3/qrep_flow_s3_test.go +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -104,7 +104,7 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { s.t.Skip("Skipping S3 test") } - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) jobName := "test_complete_flow_s3" schemaQualifiedName := fmt.Sprintf("e2e_test_%s.%s", s.suffix, jobName) @@ -125,13 +125,9 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { require.NoError(s.t, err) qrepConfig.StagingPath = s.s3Helper.s3Config.Url - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) // Verify destination has 1 file // make context with timeout @@ -150,7 +146,7 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { s.t.Skip("Skipping S3 test") } - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) jobName := "test_complete_flow_s3_ctid" schemaQualifiedName := fmt.Sprintf("e2e_test_%s.%s", s.suffix, jobName) @@ -173,13 +169,9 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { qrepConfig.InitialCopyOnly = true qrepConfig.WatermarkColumn = "ctid" - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) // Verify destination has 1 file // make context with timeout diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 40dbefaa2e..9eaf491e47 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -127,7 +127,7 @@ func SetupSuite(t *testing.T) PeerFlowE2ETestSuiteSF { } func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_simple_flow_sf" srcTableName := s.attachSchemaSuffix(tableName) @@ -151,26 +151,24 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + // wait for PeerFlowStatusQuery to finish setup // and then insert 20 rows into the source table - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 20 rows into the source table - for i := range 20 { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s (key, value) VALUES ($1, $2) - `, srcTableName), testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 20 rows into the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,key,value") + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 20 rows into the source table + for i := range 20 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (key, value) VALUES ($1, $2) + `, srcTableName), testKey, testValue) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 20 rows into the source table") + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,key,value") - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) // check the number of rows where _PEERDB_SYNCED_AT is newer than 5 mins ago @@ -184,7 +182,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { } func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_replica_identity_no_pkey" srcTableName := s.attachSchemaSuffix(tableName) @@ -211,34 +209,32 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert 20 rows into the source table - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 20 rows into the source table - for i := range 20 { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s (id, key, value) VALUES ($1, $2, $3) - `, srcTableName), i, testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 20 rows into the source table") + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 20 rows into the source table + for i := range 20 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (id, key, value) VALUES ($1, $2, $3) + `, srcTableName), i, testKey, testValue) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 20 rows into the source table") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize insert", func() bool { - count, err := s.sfHelper.CountRows("test_replica_identity_no_pkey") - return err == nil && count == 20 - }) - env.CancelWorkflow() - }() + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize insert", func() bool { + count, err := s.sfHelper.CountRows("test_replica_identity_no_pkey") + return err == nil && count == 20 + }) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_invalid_geo_sf_avro_cdc" srcTableName := s.attachSchemaSuffix(tableName) @@ -262,63 +258,61 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 4 invalid shapes and 6 valid shapes into the source table - for range 4 { - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 4 invalid shapes and 6 valid shapes into the source table + for range 4 { + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (line,poly) VALUES ($1,$2) `, srcTableName), "010200000001000000000000000000F03F0000000000000040", - "0103000020e6100000010000000c0000001a8361d35dc64140afdb8d2b1bc3c9bf1b8ed4685fc641405ba64c"+ - "579dc2c9bf6a6ad95a5fc64140cd82767449c2c9bf9570fbf85ec641408a07944db9c2c9bf729a18a55ec6414021b8b748c7c2c9bfba46de4c"+ - "5fc64140f2567052abc2c9bf2df9c5925fc641409394e16573c2c9bf2df9c5925fc6414049eceda9afc1c9bfdd1cc1a05fc64140fe43faedebc0"+ - "c9bf4694f6065fc64140fe43faedebc0c9bfffe7305f5ec641406693d6f2ddc0c9bf1a8361d35dc64140afdb8d2b1bc3c9bf", - ) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 4 invalid geography rows into the source table") - for range 6 { - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + "0103000020e6100000010000000c0000001a8361d35dc64140afdb8d2b1bc3c9bf1b8ed4685fc641405ba64c"+ + "579dc2c9bf6a6ad95a5fc64140cd82767449c2c9bf9570fbf85ec641408a07944db9c2c9bf729a18a55ec6414021b8b748c7c2c9bfba46de4c"+ + "5fc64140f2567052abc2c9bf2df9c5925fc641409394e16573c2c9bf2df9c5925fc6414049eceda9afc1c9bfdd1cc1a05fc64140fe43faedebc0"+ + "c9bf4694f6065fc64140fe43faedebc0c9bfffe7305f5ec641406693d6f2ddc0c9bf1a8361d35dc64140afdb8d2b1bc3c9bf", + ) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 4 invalid geography rows into the source table") + for range 6 { + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (line,poly) VALUES ($1,$2) `, srcTableName), "010200000002000000000000000000F03F000000000000004000000000000008400000000000001040", - "010300000001000000050000000000000000000000000000000000000000000000"+ - "00000000000000000000f03f000000000000f03f000000000000f03f0000000000"+ - "00f03f000000000000000000000000000000000000000000000000") - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 6 valid geography rows and 10 total rows into source") + "010300000001000000050000000000000000000000000000000000000000000000"+ + "00000000000000000000f03f000000000000f03f000000000000f03f0000000000"+ + "00f03f000000000000000000000000000000000000000000000000") + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 6 valid geography rows and 10 total rows into source") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize shapes", func() bool { - // We inserted 4 invalid shapes in each, - // which should be filtered out as null on destination. - lineCount, err := s.sfHelper.CountNonNullRows("test_invalid_geo_sf_avro_cdc", "line") - if err != nil { - return false - } + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize shapes", func() bool { + // We inserted 4 invalid shapes in each, + // which should be filtered out as null on destination. + lineCount, err := s.sfHelper.CountNonNullRows("test_invalid_geo_sf_avro_cdc", "line") + if err != nil { + return false + } - polyCount, err := s.sfHelper.CountNonNullRows("test_invalid_geo_sf_avro_cdc", "poly") - if err != nil { - return false - } + polyCount, err := s.sfHelper.CountNonNullRows("test_invalid_geo_sf_avro_cdc", "poly") + if err != nil { + return false + } - if lineCount != 6 || polyCount != 6 { - s.t.Logf("wrong counts, expect 6 lines 6 polies, not %d lines %d polies", lineCount, polyCount) - return false - } else { - return true - } - }) - env.CancelWorkflow() - }() + if lineCount != 6 || polyCount != 6 { + s.t.Logf("wrong counts, expect 6 lines 6 polies, not %d lines %d polies", lineCount, polyCount) + return false + } else { + return true + } + }) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_sf_1") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_1") @@ -342,36 +336,34 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* - Executing a transaction which - 1. changes both toast column - 2. changes no toast column - 2. changes 1 toast column - */ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - BEGIN; - INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE %s SET k=102 WHERE id=1; - UPDATE %s SET t1='dummy' WHERE id=2; - END; - `, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_1", `id,t1,t2,k`) - env.CancelWorkflow() - }() + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* + Executing a transaction which + 1. changes both toast column + 2. changes no toast column + 2. changes 1 toast column + */ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + END; + `, srcTableName, srcTableName, srcTableName)) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_1", `id,t1,t2,k`) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_sf_3") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_3") @@ -395,42 +387,40 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // complex transaction with random DMLs on a table with toast columns - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - BEGIN; - INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE %s SET k=102 WHERE id=1; - UPDATE %s SET t1='dummy' WHERE id=2; - UPDATE %s SET t2='dummy' WHERE id=2; - DELETE FROM %s WHERE id=1; - INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE %s SET k=1 WHERE id=1; - UPDATE %s SET t1='dummy1',t2='dummy2' WHERE id=1; - UPDATE %s SET t1='dummy3' WHERE id=3; - DELETE FROM %s WHERE id=2; - DELETE FROM %s WHERE id=3; - DELETE FROM %s WHERE id=2; - END; - `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, - srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_3", `id,t1,t2,k`) - env.CancelWorkflow() - }() + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // complex transaction with random DMLs on a table with toast columns + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + UPDATE %s SET t2='dummy' WHERE id=2; + DELETE FROM %s WHERE id=1; + INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=1 WHERE id=1; + UPDATE %s SET t1='dummy1',t2='dummy2' WHERE id=1; + UPDATE %s SET t1='dummy3' WHERE id=3; + DELETE FROM %s WHERE id=2; + DELETE FROM %s WHERE id=3; + DELETE FROM %s WHERE id=2; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, + srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_3", `id,t1,t2,k`) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_sf_4") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_4") @@ -453,36 +443,34 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // complex transaction with random DMLs on a table with toast columns - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - BEGIN; - INSERT INTO %s (t1,k) SELECT random_string(9000), - 1 FROM generate_series(1,1); - UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 - FROM generate_series(1,1) ) sub WHERE id=1; - UPDATE %s SET k=2 WHERE id=1; - UPDATE %s SET k=3 WHERE id=1; - UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 - FROM generate_series(1,1)) sub WHERE id=1; - UPDATE %s SET k=4 WHERE id=1; - END; - `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_4", `id,t1,k`) - env.CancelWorkflow() - }() + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // complex transaction with random DMLs on a table with toast columns + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,k) SELECT random_string(9000), + 1 FROM generate_series(1,1); + UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 + FROM generate_series(1,1) ) sub WHERE id=1; + UPDATE %s SET k=2 WHERE id=1; + UPDATE %s SET k=3 WHERE id=1; + UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 + FROM generate_series(1,1)) sub WHERE id=1; + UPDATE %s SET k=4 WHERE id=1; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_4", `id,t1,k`) + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_toast_sf_5") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_5") @@ -506,36 +494,34 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* - transaction updating a single row - multiple times with changed/unchanged toast columns - */ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - BEGIN; - INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,1); - UPDATE %s SET k=102 WHERE id=1; - UPDATE %s SET t1='dummy' WHERE id=1; - UPDATE %s SET t2='dummy' WHERE id=1; - END; - `, srcTableName, srcTableName, srcTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Executed a transaction touching toast columns") + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* + transaction updating a single row + multiple times with changed/unchanged toast columns + */ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,1); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=1; + UPDATE %s SET t2='dummy' WHERE id=1; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName)) + e2e.EnvNoError(s.t, env, err) + s.t.Log("Executed a transaction touching toast columns") + + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_5", `id,t1,t2,k`) + env.Cancel() - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_toast_sf_5", `id,t1,t2,k`) - env.CancelWorkflow() - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_types_sf") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_types_sf") @@ -566,71 +552,69 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* test inserting various types*/ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s SELECT 2,2,b'1',b'101', - true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, - '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, - '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, - 1.2,100.24553,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, - 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, - txid_current_snapshot(), - '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), - 'POINT(1 2)','POINT(40.7128 -74.0060)','POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))', - 'LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831)','LINESTRING(0 0, 1 1, 2 2)', - 'POLYGON((-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831, -74.0060 40.7128))', 'happy','"a"=>"a\"quote\"", "b"=>NULL', - '{2020-01-01, 2020-01-02}'::date[], - '{"2020-01-01 01:01:01+00", "2020-01-02 01:01:01+00"}'::timestamptz[], - '{"2020-01-01 01:01:01", "2020-01-02 01:01:01"}'::timestamp[], - '{true, false}'::boolean[], - '{1,2}'::smallint[]; - `, srcTableName)) - e2e.EnvNoError(s.t, env, err) - - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize types", func() bool { - noNulls, err := s.sfHelper.CheckNull("test_types_sf", []string{ - "c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46", "c47", "c48", "c49", - "c50", "c51", "c52", "c53", "c54", - }) - if err != nil { - s.t.Log(err) - return false - } + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* test inserting various types*/ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s SELECT 2,2,b'1',b'101', + true,random_bytea(32),'s','test','1.1.10.2'::cidr, + CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, + '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, + '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, + 1.2,100.24553,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, + 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, + txid_current_snapshot(), + '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), + 'POINT(1 2)','POINT(40.7128 -74.0060)','POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))', + 'LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831)','LINESTRING(0 0, 1 1, 2 2)', + 'POLYGON((-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831, -74.0060 40.7128))', 'happy','"a"=>"a\"quote\"", "b"=>NULL', + '{2020-01-01, 2020-01-02}'::date[], + '{"2020-01-01 01:01:01+00", "2020-01-02 01:01:01+00"}'::timestamptz[], + '{"2020-01-01 01:01:01", "2020-01-02 01:01:01"}'::timestamp[], + '{true, false}'::boolean[], + '{1,2}'::smallint[]; + `, srcTableName)) + e2e.EnvNoError(s.t, env, err) + + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize types", func() bool { + noNulls, err := s.sfHelper.CheckNull("test_types_sf", []string{ + "c41", "c1", "c2", "c3", "c4", + "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", + "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", + "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46", "c47", "c48", "c49", + "c50", "c51", "c52", "c53", "c54", + }) + if err != nil { + s.t.Log(err) + return false + } - // check if JSON on snowflake side is a good JSON - if err := s.checkJSONValue(dstTableName, "c17", "sai", "1"); err != nil { - return false - } + // check if JSON on snowflake side is a good JSON + if err := s.checkJSONValue(dstTableName, "c17", "sai", "1"); err != nil { + return false + } - // check if HSTORE on snowflake is a good JSON - if err := s.checkJSONValue(dstTableName, "c49", "a", `"a\"quote\""`); err != nil { - return false - } + // check if HSTORE on snowflake is a good JSON + if err := s.checkJSONValue(dstTableName, "c49", "a", `"a\"quote\""`); err != nil { + return false + } - if err := s.checkJSONValue(dstTableName, "c49", "b", "null"); err != nil { - return false - } + if err := s.checkJSONValue(dstTableName, "c49", "b", "null"); err != nil { + return false + } - return noNulls - }) + return noNulls + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTable1Name := s.attachSchemaSuffix("test1_sf") srcTable2Name := s.attachSchemaSuffix("test2_sf") @@ -652,39 +636,37 @@ func (s PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - /* inserting across multiple tables*/ - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); - INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); - `, srcTable1Name, srcTable2Name)) - e2e.EnvNoError(s.t, env, err) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + /* inserting across multiple tables*/ + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); + INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); + `, srcTable1Name, srcTable2Name)) + e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize both tables", func() bool { - count1, err := s.sfHelper.CountRows("test1_sf") - if err != nil { - return false - } - count2, err := s.sfHelper.CountRows("test2_sf") - if err != nil { - return false - } + e2e.EnvWaitFor(s.t, env, 2*time.Minute, "normalize both tables", func() bool { + count1, err := s.sfHelper.CountRows("test1_sf") + if err != nil { + return false + } + count2, err := s.sfHelper.CountRows("test2_sf") + if err != nil { + return false + } - return count1 == 1 && count2 == 1 - }) + return count1 == 1 && count2 == 1 + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_simple_schema_changes") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_simple_schema_changes") @@ -706,199 +688,197 @@ func (s PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted initial row in the source table") - - e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", "test_simple_schema_changes", "id,c1") - - expectedTableSchema := &protos.TableSchema{ - TableIdentifier: strings.ToUpper(dstTableName), - Columns: []*protos.FieldDescription{ - { - Name: "ID", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "C1", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "_PEERDB_IS_DELETED", - Type: string(qvalue.QValueKindBoolean), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted initial row in the source table") + + e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", "test_simple_schema_changes", "id,c1") + + expectedTableSchema := &protos.TableSchema{ + TableIdentifier: strings.ToUpper(dstTableName), + Columns: []*protos.FieldDescription{ + { + Name: "ID", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, }, - } - output, err := s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) + { + Name: "C1", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + { + Name: "_PEERDB_IS_DELETED", + Type: string(qvalue.QValueKindBoolean), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + }, + } + output, err := s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ + TableIdentifiers: []string{dstTableName}, + }) + e2e.EnvNoError(s.t, env, err) + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) - // alter source table, add column c2 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, add column c2 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s ADD COLUMN c2 BIGINT`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, added column c2") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, added column c2") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2) VALUES ($1,$2)`, srcTableName), 2, 2) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row with added c2 in the source table") - - // verify we got our two rows, if schema did not match up it will error. - e2e.EnvWaitForEqualTables(env, s, "normalize altered row", "test_simple_schema_changes", "id,c1,c2") - expectedTableSchema = &protos.TableSchema{ - TableIdentifier: strings.ToUpper(dstTableName), - Columns: []*protos.FieldDescription{ - { - Name: "ID", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "C1", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, - { - Name: "C2", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row with added c2 in the source table") + + // verify we got our two rows, if schema did not match up it will error. + e2e.EnvWaitForEqualTables(env, s, "normalize altered row", "test_simple_schema_changes", "id,c1,c2") + expectedTableSchema = &protos.TableSchema{ + TableIdentifier: strings.ToUpper(dstTableName), + Columns: []*protos.FieldDescription{ + { + Name: "ID", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, }, - } - output, err = s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c2") + { + Name: "C1", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + { + Name: "C2", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + }, + } + output, err = s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ + TableIdentifiers: []string{dstTableName}, + }) + e2e.EnvNoError(s.t, env, err) + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) + e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c2") - // alter source table, add column c3, drop column c2 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, add column c3, drop column c2 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c2, ADD COLUMN c3 BIGINT`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, dropped column c2 and added column c3") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, dropped column c2 and added column c3") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c3) VALUES ($1,$2)`, srcTableName), 3, 3) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row with added c3 in the source table") - - // verify we got our two rows, if schema did not match up it will error. - e2e.EnvWaitForEqualTables(env, s, "normalize dropped c2 column", "test_simple_schema_changes", "id,c1,c3") - expectedTableSchema = &protos.TableSchema{ - TableIdentifier: strings.ToUpper(dstTableName), - Columns: []*protos.FieldDescription{ - { - Name: "ID", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "C1", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, - { - Name: "C2", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "C3", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row with added c3 in the source table") + + // verify we got our two rows, if schema did not match up it will error. + e2e.EnvWaitForEqualTables(env, s, "normalize dropped c2 column", "test_simple_schema_changes", "id,c1,c3") + expectedTableSchema = &protos.TableSchema{ + TableIdentifier: strings.ToUpper(dstTableName), + Columns: []*protos.FieldDescription{ + { + Name: "ID", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, }, - } - output, err = s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c3") + { + Name: "C1", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + { + Name: "C2", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + { + Name: "C3", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + }, + } + output, err = s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ + TableIdentifiers: []string{dstTableName}, + }) + e2e.EnvNoError(s.t, env, err) + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) + e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1,c3") - // alter source table, drop column c3 and insert another row. - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // alter source table, drop column c3 and insert another row. + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c3`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Altered source table, dropped column c3") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + s.t.Log("Altered source table, dropped column c3") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 4) - e2e.EnvNoError(s.t, env, err) - s.t.Log("Inserted row after dropping all columns in the source table") - - // verify we got our two rows, if schema did not match up it will error. - e2e.EnvWaitForEqualTables(env, s, "normalize dropped c3 column", "test_simple_schema_changes", "id,c1") - expectedTableSchema = &protos.TableSchema{ - TableIdentifier: strings.ToUpper(dstTableName), - Columns: []*protos.FieldDescription{ - { - Name: "ID", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "C1", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), - TypeModifier: -1, - }, - { - Name: "C2", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, - { - Name: "C3", - Type: string(qvalue.QValueKindNumeric), - TypeModifier: -1, - }, + e2e.EnvNoError(s.t, env, err) + s.t.Log("Inserted row after dropping all columns in the source table") + + // verify we got our two rows, if schema did not match up it will error. + e2e.EnvWaitForEqualTables(env, s, "normalize dropped c3 column", "test_simple_schema_changes", "id,c1") + expectedTableSchema = &protos.TableSchema{ + TableIdentifier: strings.ToUpper(dstTableName), + Columns: []*protos.FieldDescription{ + { + Name: "ID", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, }, - } - output, err = s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ - TableIdentifiers: []string{dstTableName}, - }) - e2e.EnvNoError(s.t, env, err) - e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) - e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1") + { + Name: "C1", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + { + Name: "_PEERDB_SYNCED_AT", + Type: string(qvalue.QValueKindTimestamp), + TypeModifier: -1, + }, + { + Name: "C2", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + { + Name: "C3", + Type: string(qvalue.QValueKindNumeric), + TypeModifier: -1, + }, + }, + } + output, err = s.connector.GetTableSchema(context.Background(), &protos.GetTableSchemaBatchInput{ + TableIdentifiers: []string{dstTableName}, + }) + e2e.EnvNoError(s.t, env, err) + e2e.EnvTrue(s.t, env, e2e.CompareTableSchemas(expectedTableSchema, output.TableNameSchemaMapping[dstTableName])) + e2e.EnvEqualTables(env, s, "test_simple_schema_changes", "id,c1") - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_simple_cpkey") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_simple_cpkey") @@ -923,38 +903,36 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t) VALUES ($1,$2) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - e2e.EnvWaitForEqualTables(env, s, "normalize table", "test_simple_cpkey", "id,c1,c2,t") + e2e.EnvWaitForEqualTables(env, s, "normalize table", "test_simple_cpkey", "id,c1,c2,t") - _, err := s.Conn().Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", "test_simple_cpkey", "id,c1,c2,t") + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", "test_simple_cpkey", "id,c1,c2,t") - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_cpkey_toast1") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_cpkey_toast1") @@ -980,42 +958,40 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - rowsTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) - - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + rowsTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) + + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - _, err = rowsTx.Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 10 rows into the source table") - err = rowsTx.Commit(context.Background()) - e2e.EnvNoError(s.t, env, err) + _, err = rowsTx.Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_cpkey_toast1", "id,c1,c2,t,t2") - env.CancelWorkflow() - }() + err = rowsTx.Commit(context.Background()) + e2e.EnvNoError(s.t, env, err) + + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_cpkey_toast1", "id,c1,c2,t,t2") + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_cpkey_toast2" srcTableName := s.attachSchemaSuffix(tableName) @@ -1042,38 +1018,36 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c2,t,t2") - _, err = s.Conn().Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,c2,t,t2") + } + s.t.Log("Inserted 10 rows into the source table") + + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c2,t,t2") + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,c2,t,t2") - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_exclude_sf" srcTableName := s.attachSchemaSuffix(tableName) @@ -1111,33 +1085,31 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 10 rows into the source table - for i := range 10 { - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + // insert 10 rows into the source table + for i := range 10 { + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(100)) `, srcTableName), i, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 10 rows into the source table") - - e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1,t,t2") - _, err = s.Conn().Exec(context.Background(), - fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=0`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,c1,t,t2") + } + s.t.Log("Inserted 10 rows into the source table") + + e2e.EnvWaitForEqualTables(env, s, "normalize table", tableName, "id,c1,t,t2") + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=1`, srcTableName)) + e2e.EnvNoError(s.t, env, err) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=0`, srcTableName)) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize update/delete", tableName, "id,c1,t,t2") - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) e2e.RequireEnvCanceled(s.t, env) sfRows, err := s.GetRows(tableName, "*") @@ -1150,7 +1122,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { } func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_softdel_src" dstName := "test_softdel" @@ -1188,35 +1160,33 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", tableName, dstName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize row", tableName, dstName, "id,c1,c2,t") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", tableName, dstName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", tableName, dstName, "id,c1,c2,t") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames( - env, - s, - "normalize delete", - tableName, - dstName+" WHERE NOT _PEERDB_IS_DELETED", - "id,c1,c2,t", - ) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize delete", + tableName, + dstName+" WHERE NOT _PEERDB_IS_DELETED", + "id,c1,c2,t", + ) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) e2e.RequireEnvCanceled(s.t, env) newerSyncedAtQuery := fmt.Sprintf(` @@ -1227,7 +1197,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { } func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_IUD_Same_Batch() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) cmpTableName := s.attachSchemaSuffix("test_softdel_iud") srcTableName := cmpTableName + "_src" @@ -1264,48 +1234,46 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_IUD_Same_Batch() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - insertTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) + insertTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - // since we delete stuff, create another table to compare with - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + // since we delete stuff, create another table to compare with + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) + e2e.EnvNoError(s.t, env, err) - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_softdel_iud", "id,c1,c2,t") - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "checking soft delete", func() bool { - newerSyncedAtQuery := fmt.Sprintf(` + e2e.EnvWaitForEqualTables(env, s, "normalizing tx", "test_softdel_iud", "id,c1,c2,t") + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "checking soft delete", func() bool { + newerSyncedAtQuery := fmt.Sprintf(` SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED`, dstTableName) - numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) - e2e.EnvNoError(s.t, env, err) - return numNewRows == 1 - }) + numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) + e2e.EnvNoError(s.t, env, err) + return numNewRows == 1 + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_softdel_ud_src" dstName := "test_softdel_ud" @@ -1343,54 +1311,52 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert, update and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", tableName, dstName, "id,c1,c2,t") + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", tableName, dstName, "id,c1,c2,t") - insertTx, err := s.Conn().Begin(context.Background()) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + insertTx, err := s.Conn().Begin(context.Background()) + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET t=random_string(10000) WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - - e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) - e2e.EnvWaitForEqualTablesWithNames( - env, - s, - "normalize transaction", - tableName, - dstName+" WHERE NOT _PEERDB_IS_DELETED", - "id,c1,c2,t", - ) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "checking soft delete", func() bool { - newerSyncedAtQuery := fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + + e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize transaction", + tableName, + dstName+" WHERE NOT _PEERDB_IS_DELETED", + "id,c1,c2,t", + ) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "checking soft delete", func() bool { + newerSyncedAtQuery := fmt.Sprintf(` SELECT COUNT(*) FROM %s WHERE _PEERDB_IS_DELETED`, dstTableName) - numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) - e2e.EnvNoError(s.t, env, err) - return numNewRows == 1 - }) + numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) + e2e.EnvNoError(s.t, env, err) + return numNewRows == 1 + }) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) e2e.RequireEnvCanceled(s.t, env) } func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) tableName := "test_softdel_iad" srcTableName := s.attachSchemaSuffix(tableName) @@ -1427,36 +1393,34 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { MaxBatchSize: 100, } - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert and delete rows in the table. - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize row", tableName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize row", tableName, "id,c1,c2,t") + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` DELETE FROM %s WHERE id=1`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTablesWithNames( - env, - s, - "normalize delete", - tableName, - tableName+" WHERE NOT _PEERDB_IS_DELETED", - "id,c1,c2,t", - ) + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize delete", + tableName, + tableName+" WHERE NOT _PEERDB_IS_DELETED", + "id,c1,c2,t", + ) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) - e2e.EnvNoError(s.t, env, err) - e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1,c2,t") + e2e.EnvNoError(s.t, env, err) + e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1,c2,t") - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, config, nil) e2e.RequireEnvCanceled(s.t, env) newerSyncedAtQuery := fmt.Sprintf(` @@ -1467,7 +1431,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { } func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("testMixedCase") dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "testMixedCase") @@ -1491,32 +1455,30 @@ func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() flowConnConfig.MaxBatchSize = 100 - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // wait for PeerFlowStatusQuery to finish setup // and then insert 20 rows into the source table - go func() { - e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) - // insert 20 rows into the source table - for i := range 20 { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` + env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, connectionGen) + // insert 20 rows into the source table + for i := range 20 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` INSERT INTO e2e_test_%s."%s"("highGold","eVe") VALUES ($1, $2) `, s.pgSuffix, "testMixedCase"), testKey, testValue) - e2e.EnvNoError(s.t, env, err) - } - s.t.Log("Inserted 20 rows into the source table") - e2e.EnvWaitForEqualTablesWithNames( - env, - s, - "normalize mixed case", - "testMixedCase", - "\"testMixedCase\"", - "id,\"pulseArmor\",\"highGold\",\"eVe\"", - ) + e2e.EnvNoError(s.t, env, err) + } + s.t.Log("Inserted 20 rows into the source table") + e2e.EnvWaitForEqualTablesWithNames( + env, + s, + "normalize mixed case", + "testMixedCase", + "\"testMixedCase\"", + "id,\"pulseArmor\",\"highGold\",\"eVe\"", + ) - env.CancelWorkflow() - }() + env.Cancel() - env.ExecuteWorkflow(peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.RequireEnvCanceled(s.t, env) } diff --git a/flow/e2e/snowflake/qrep_flow_sf_test.go b/flow/e2e/snowflake/qrep_flow_sf_test.go index a662513e6a..7a417238a6 100644 --- a/flow/e2e/snowflake/qrep_flow_sf_test.go +++ b/flow/e2e/snowflake/qrep_flow_sf_test.go @@ -2,6 +2,7 @@ package e2e_snowflake import ( "fmt" + "time" "github.com/google/uuid" "github.com/stretchr/testify/require" @@ -46,7 +47,7 @@ func (s PeerFlowE2ETestSuiteSF) compareTableContentsWithDiffSelectorsSF(tableNam } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -71,13 +72,9 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { qrepConfig.SetupWatermarkTableOnDestination = true require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) @@ -87,7 +84,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -116,20 +113,16 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() qrepConfig.SetupWatermarkTableOnDestination = true require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -155,20 +148,16 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) qrepConfig.SetupWatermarkTableOnDestination = true - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -198,20 +187,16 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { qrepConfig.SetupWatermarkTableOnDestination = true require.NoError(s.t, err) - e2e.RunXminFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunXminFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -240,20 +225,16 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) qrepConfig.SetupWatermarkTableOnDestination = true - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) } func (s PeerFlowE2ETestSuiteSF) Test_PeerDB_Columns_QRep_SF() { - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 @@ -282,13 +263,9 @@ func (s PeerFlowE2ETestSuiteSF) Test_PeerDB_Columns_QRep_SF() { qrepConfig.SetupWatermarkTableOnDestination = true require.NoError(s.t, err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err = env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) err = s.sfHelper.checkSyncedAt(fmt.Sprintf(`SELECT "_PEERDB_SYNCED_AT" FROM %s.%s`, s.sfHelper.testSchemaName, tblName)) diff --git a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go index 2e7572cdb0..448d0f32b7 100644 --- a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go +++ b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go @@ -147,7 +147,7 @@ func (s PeerFlowE2ETestSuiteSQLServer) Test_Complete_QRep_Flow_SqlServer_Append( s.t.Skip("Skipping SQL Server test") } - env := e2e.NewTemporalTestWorkflowEnvironment(s.t) + tc := e2e.NewTemporalClient(s.t) numRows := 10 tblName := "test_qrep_flow_avro_ss_append" @@ -178,18 +178,14 @@ func (s PeerFlowE2ETestSuiteSQLServer) Test_Complete_QRep_Flow_SqlServer_Append( WaitBetweenBatchesSeconds: 5, } - e2e.RunQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - require.True(s.t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - require.NoError(s.t, err) + env := e2e.RunQrepFlowWorkflow(tc, qrepConfig) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + require.NoError(s.t, env.Error()) // Verify that the destination table has the same number of rows as the source table var numRowsInDest pgtype.Int8 countQuery := "SELECT COUNT(*) FROM " + dstTableName - err = s.Conn().QueryRow(context.Background(), countQuery).Scan(&numRowsInDest) + err := s.Conn().QueryRow(context.Background(), countQuery).Scan(&numRowsInDest) require.NoError(s.t, err) require.Equal(s.t, numRows, int(numRowsInDest.Int64)) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 6239f7d148..92d04d1f3d 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "log/slog" - "runtime" "slices" "strings" "testing" @@ -16,19 +15,15 @@ import ( "github.com/jackc/pgerrcode" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgconn" - "github.com/jackc/pgx/v5/pgxpool" "github.com/stretchr/testify/require" + "go.temporal.io/api/enums/v1" + "go.temporal.io/sdk/client" + "go.temporal.io/sdk/converter" "go.temporal.io/sdk/temporal" - "go.temporal.io/sdk/testsuite" - "go.temporal.io/sdk/worker" - "github.com/PeerDB-io/peer-flow/activities" - "github.com/PeerDB-io/peer-flow/alerting" - "github.com/PeerDB-io/peer-flow/connectors" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" connsnowflake "github.com/PeerDB-io/peer-flow/connectors/snowflake" "github.com/PeerDB-io/peer-flow/connectors/utils" - catalog "github.com/PeerDB-io/peer-flow/connectors/utils/catalog" "github.com/PeerDB-io/peer-flow/e2eshared" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/logger" @@ -49,61 +44,25 @@ type RowSource interface { GetRows(table, cols string) (*model.QRecordBatch, error) } -func RegisterWorkflowsAndActivities(t *testing.T, env *testsuite.TestWorkflowEnvironment) { - t.Helper() - - conn, err := pgxpool.New(context.Background(), catalog.GetCatalogConnectionStringFromEnv()) - if err != nil { - t.Fatalf("unable to create catalog connection pool: %v", err) - } - - // set a 5 minute timeout for the workflow to execute a few runs. - env.SetTestTimeout(5 * time.Minute) - - peerflow.RegisterFlowWorkerWorkflows(env) - env.RegisterWorkflow(peerflow.SnapshotFlowWorkflow) - - alerter, err := alerting.NewAlerter(conn) - if err != nil { - t.Fatalf("unable to create alerter: %v", err) - } - - env.RegisterActivity(&activities.FlowableActivity{ - CatalogPool: conn, - Alerter: alerter, - CdcCache: make(map[string]connectors.CDCPullConnector), - }) - env.RegisterActivity(&activities.SnapshotActivity{ - SnapshotConnections: make(map[string]activities.SlotSnapshotSignal), - Alerter: alerter, - }) -} - -func EnvSignalWorkflow[T any](env *testsuite.TestWorkflowEnvironment, signal model.TypedSignal[T], value T) { - env.SignalWorkflow(signal.Name, value) -} - // Helper function to assert errors in go routines running concurrent to workflows // This achieves two goals: // 1. cancel workflow to avoid waiting on goroutine which has failed // 2. get around t.FailNow being incorrect when called from non initial goroutine -func EnvNoError(t *testing.T, env *testsuite.TestWorkflowEnvironment, err error) { +func EnvNoError(t *testing.T, env WorkflowRun, err error) { t.Helper() if err != nil { - t.Error("UNEXPECTED ERROR", err.Error()) - env.CancelWorkflow() - runtime.Goexit() + env.Cancel() + t.Fatal("UNEXPECTED ERROR", err.Error()) } } -func EnvTrue(t *testing.T, env *testsuite.TestWorkflowEnvironment, val bool) { +func EnvTrue(t *testing.T, env WorkflowRun, val bool) { t.Helper() if !val { - t.Error("UNEXPECTED FALSE") - env.CancelWorkflow() - runtime.Goexit() + env.Cancel() + t.Fatal("UNEXPECTED FALSE") } } @@ -130,7 +89,7 @@ func RequireEqualTables(suite RowSource, table string, cols string) { require.True(t, e2eshared.CheckEqualRecordBatches(t, pgRows, rows)) } -func EnvEqualTables(env *testsuite.TestWorkflowEnvironment, suite RowSource, table string, cols string) { +func EnvEqualTables(env WorkflowRun, suite RowSource, table string, cols string) { t := suite.T() t.Helper() @@ -144,7 +103,7 @@ func EnvEqualTables(env *testsuite.TestWorkflowEnvironment, suite RowSource, tab } func EnvWaitForEqualTables( - env *testsuite.TestWorkflowEnvironment, + env WorkflowRun, suite RowSource, reason string, table string, @@ -155,7 +114,7 @@ func EnvWaitForEqualTables( } func EnvWaitForEqualTablesWithNames( - env *testsuite.TestWorkflowEnvironment, + env WorkflowRun, suite RowSource, reason string, srcTable string, @@ -182,9 +141,10 @@ func EnvWaitForEqualTablesWithNames( }) } -func RequireEnvCanceled(t *testing.T, env *testsuite.TestWorkflowEnvironment) { +func RequireEnvCanceled(t *testing.T, env WorkflowRun) { t.Helper() - err := env.GetWorkflowError() + EnvWaitForFinished(t, env, time.Minute) + err := env.Error() var panicErr *temporal.PanicError var canceledErr *temporal.CanceledError if err == nil { @@ -196,31 +156,25 @@ func RequireEnvCanceled(t *testing.T, env *testsuite.TestWorkflowEnvironment) { } } -func SetupCDCFlowStatusQuery(t *testing.T, env *testsuite.TestWorkflowEnvironment, - connectionGen FlowConnectionGenerationConfig, -) { +func SetupCDCFlowStatusQuery(t *testing.T, env WorkflowRun, connectionGen FlowConnectionGenerationConfig) { t.Helper() // errors expected while PeerFlowStatusQuery is setup counter := 0 for { time.Sleep(time.Second) counter++ - response, err := env.QueryWorkflow( - shared.CDCFlowStateQuery, - connectionGen.FlowJobName, - ) + response, err := env.Query(shared.CDCFlowStateQuery, connectionGen.FlowJobName) if err == nil { var state peerflow.CDCFlowWorkflowState err = response.Get(&state) if err != nil { - t.Log(err.Error()) + t.Fatal(err) } else if state.CurrentFlowStatus == protos.FlowStatus_STATUS_RUNNING { return } } else if counter > 15 { - t.Error("UNEXPECTED SETUP CDC TIMEOUT", err.Error()) - env.CancelWorkflow() - runtime.Goexit() + env.Cancel() + t.Fatal("UNEXPECTED SETUP CDC TIMEOUT", err.Error()) } else if counter > 5 { // log the error for informational purposes t.Log(err.Error()) @@ -453,15 +407,15 @@ func CreateQRepWorkflowConfig( return qrepConfig, nil } -func RunQrepFlowWorkflow(env *testsuite.TestWorkflowEnvironment, config *protos.QRepConfig) { +func RunQrepFlowWorkflow(tc client.Client, config *protos.QRepConfig) WorkflowRun { state := peerflow.NewQRepFlowState() - env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, state) + return ExecutePeerflow(tc, peerflow.QRepFlowWorkflow, config, state) } -func RunXminFlowWorkflow(env *testsuite.TestWorkflowEnvironment, config *protos.QRepConfig) { +func RunXminFlowWorkflow(tc client.Client, config *protos.QRepConfig) WorkflowRun { state := peerflow.NewQRepFlowState() state.LastPartition.PartitionId = uuid.New().String() - env.ExecuteWorkflow(peerflow.XminFlowWorkflow, config, state) + return ExecutePeerflow(tc, peerflow.XminFlowWorkflow, config, state) } func GetOwnersSchema() *model.QRecordSchema { @@ -550,9 +504,8 @@ func (tw *testWriter) Write(p []byte) (int, error) { return len(p), nil } -func NewTemporalTestWorkflowEnvironment(t *testing.T) *testsuite.TestWorkflowEnvironment { +func NewTemporalClient(t *testing.T) client.Client { t.Helper() - testSuite := &testsuite.WorkflowTestSuite{} logger := slog.New(logger.NewHandler( slog.NewJSONHandler( @@ -560,36 +513,76 @@ func NewTemporalTestWorkflowEnvironment(t *testing.T) *testsuite.TestWorkflowEnv &slog.HandlerOptions{Level: slog.LevelWarn}, ), )) - testSuite.SetLogger(&TStructuredLogger{logger: logger}) - env := testSuite.NewTestWorkflowEnvironment() - env.SetWorkerOptions(worker.Options{EnableSessionWorker: true}) - RegisterWorkflowsAndActivities(t, env) - return env + tc, err := client.Dial(client.Options{ + HostPort: "localhost:7233", + Logger: logger, + }) + if err != nil { + t.Fatalf("Failed to connect temporal client: %v", err) + } + return tc } -type TStructuredLogger struct { - logger *slog.Logger +type WorkflowRun struct { + client.WorkflowRun + c client.Client } -func (l *TStructuredLogger) keyvalsToFields(keyvals []interface{}) slog.Attr { - return slog.Group("test-log", keyvals...) +func ExecutePeerflow(tc client.Client, wf interface{}, args ...interface{}) WorkflowRun { + return ExecuteWorkflow(tc, shared.PeerFlowTaskQueue, wf, args...) } -func (l *TStructuredLogger) Debug(msg string, keyvals ...interface{}) { - l.logger.With(l.keyvalsToFields(keyvals)).Debug(msg) +func ExecuteWorkflow(tc client.Client, taskQueueID shared.TaskQueueID, wf interface{}, args ...interface{}) WorkflowRun { + taskQueue := shared.GetPeerFlowTaskQueueName(taskQueueID) + + wr, err := tc.ExecuteWorkflow( + context.Background(), + client.StartWorkflowOptions{ + TaskQueue: taskQueue, + WorkflowExecutionTimeout: 5 * time.Minute, + }, + wf, + args..., + ) + if err != nil { + panic(err) + } + return WorkflowRun{ + WorkflowRun: wr, + c: tc, + } +} + +func (env WorkflowRun) Finished() bool { + desc, err := env.c.DescribeWorkflowExecution(context.Background(), env.GetID(), "") + if err != nil { + return false + } + return desc.GetWorkflowExecutionInfo().GetStatus() != enums.WORKFLOW_EXECUTION_STATUS_RUNNING +} + +func (env WorkflowRun) Error() error { + if env.Finished() { + return env.Get(context.Background(), nil) + } else { + return nil + } } -func (l *TStructuredLogger) Info(msg string, keyvals ...interface{}) { - l.logger.With(l.keyvalsToFields(keyvals)).Info(msg) +func (env WorkflowRun) Cancel() { + _ = env.c.CancelWorkflow(context.Background(), env.GetID(), "") } -func (l *TStructuredLogger) Warn(msg string, keyvals ...interface{}) { - l.logger.With(l.keyvalsToFields(keyvals)).Warn(msg) +func (env WorkflowRun) Query(queryType string, args ...interface{}) (converter.EncodedValue, error) { + return env.c.QueryWorkflow(context.Background(), env.GetID(), "", queryType, args...) } -func (l *TStructuredLogger) Error(msg string, keyvals ...interface{}) { - l.logger.With(l.keyvalsToFields(keyvals)).Error(msg) +func SignalWorkflow[T any](env WorkflowRun, signal model.TypedSignal[T], value T) { + err := env.c.SignalWorkflow(context.Background(), env.GetID(), "", signal.Name, value) + if err != nil { + panic(err) + } } func CompareTableSchemas(x *protos.TableSchema, y *protos.TableSchema) bool { @@ -624,22 +617,43 @@ func RequireEqualRecordBatches(t *testing.T, q *model.QRecordBatch, other *model require.True(t, e2eshared.CheckEqualRecordBatches(t, q, other)) } -func EnvEqualRecordBatches(t *testing.T, env *testsuite.TestWorkflowEnvironment, q *model.QRecordBatch, other *model.QRecordBatch) { +func EnvEqualRecordBatches(t *testing.T, env WorkflowRun, q *model.QRecordBatch, other *model.QRecordBatch) { t.Helper() EnvTrue(t, env, e2eshared.CheckEqualRecordBatches(t, q, other)) } -func EnvWaitFor(t *testing.T, env *testsuite.TestWorkflowEnvironment, timeout time.Duration, reason string, f func() bool) { +func EnvWaitFor(t *testing.T, env WorkflowRun, timeout time.Duration, reason string, f func() bool) { t.Helper() t.Log("WaitFor", reason, time.Now()) deadline := time.Now().Add(timeout) for !f() { if time.Now().After(deadline) { - t.Error("UNEXPECTED TIMEOUT", reason, time.Now()) - env.CancelWorkflow() - runtime.Goexit() + env.Cancel() + t.Fatal("UNEXPECTED TIMEOUT", reason, time.Now()) } time.Sleep(time.Second) } } + +func EnvWaitForFinished(t *testing.T, env WorkflowRun, timeout time.Duration) { + t.Helper() + + EnvWaitFor(t, env, timeout, "finish", func() bool { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + desc, err := env.c.DescribeWorkflowExecution(ctx, env.GetID(), "") + if err != nil { + t.Log("Not finished", err) + return false + } + status := desc.GetWorkflowExecutionInfo().GetStatus() + if status != enums.WORKFLOW_EXECUTION_STATUS_RUNNING { + t.Log("Finished Status", status) + return true + } + return false + }) +} diff --git a/flow/cmd/main.go b/flow/main.go similarity index 58% rename from flow/cmd/main.go rename to flow/main.go index f7fa4734fb..34c66f57a0 100644 --- a/flow/cmd/main.go +++ b/flow/main.go @@ -6,11 +6,14 @@ import ( "log/slog" "os" "os/signal" + "runtime" "syscall" "github.com/urfave/cli/v3" + "go.temporal.io/sdk/worker" _ "go.uber.org/automaxprocs" + "github.com/PeerDB-io/peer-flow/cmd" "github.com/PeerDB-io/peer-flow/logger" ) @@ -64,16 +67,21 @@ func main() { Commands: []*cli.Command{ { Name: "worker", - Action: func(ctx context.Context, cmd *cli.Command) error { - temporalHostPort := cmd.String("temporal-host-port") - return WorkerMain(&WorkerOptions{ + Action: func(ctx context.Context, clicmd *cli.Command) error { + temporalHostPort := clicmd.String("temporal-host-port") + c, w, err := cmd.WorkerMain(&cmd.WorkerOptions{ TemporalHostPort: temporalHostPort, - EnableProfiling: cmd.Bool("enable-profiling"), - PyroscopeServer: cmd.String("pyroscope-server-address"), - TemporalNamespace: cmd.String("temporal-namespace"), - TemporalCert: cmd.String("temporal-cert"), - TemporalKey: cmd.String("temporal-key"), + EnableProfiling: clicmd.Bool("enable-profiling"), + PyroscopeServer: clicmd.String("pyroscope-server-address"), + TemporalNamespace: clicmd.String("temporal-namespace"), + TemporalCert: clicmd.String("temporal-cert"), + TemporalKey: clicmd.String("temporal-key"), }) + if err != nil { + return err + } + defer c.Close() + return w.Run(worker.InterruptCh()) }, Flags: []cli.Flag{ temporalHostPortFlag, @@ -86,14 +94,19 @@ func main() { }, { Name: "snapshot-worker", - Action: func(ctx context.Context, cmd *cli.Command) error { - temporalHostPort := cmd.String("temporal-host-port") - return SnapshotWorkerMain(&SnapshotWorkerOptions{ + Action: func(ctx context.Context, clicmd *cli.Command) error { + temporalHostPort := clicmd.String("temporal-host-port") + c, w, err := cmd.SnapshotWorkerMain(&cmd.SnapshotWorkerOptions{ TemporalHostPort: temporalHostPort, - TemporalNamespace: cmd.String("temporal-namespace"), - TemporalCert: cmd.String("temporal-cert"), - TemporalKey: cmd.String("temporal-key"), + TemporalNamespace: clicmd.String("temporal-namespace"), + TemporalCert: clicmd.String("temporal-cert"), + TemporalKey: clicmd.String("temporal-key"), }) + if err != nil { + return err + } + defer c.Close() + return w.Run(worker.InterruptCh()) }, Flags: []cli.Flag{ temporalHostPortFlag, @@ -120,22 +133,33 @@ func main() { &temporalCertFlag, &temporalKeyFlag, }, - Action: func(ctx context.Context, cmd *cli.Command) error { - temporalHostPort := cmd.String("temporal-host-port") + Action: func(ctx context.Context, clicmd *cli.Command) error { + temporalHostPort := clicmd.String("temporal-host-port") - return APIMain(ctx, &APIServerParams{ - Port: uint16(cmd.Uint("port")), + return cmd.APIMain(ctx, &cmd.APIServerParams{ + Port: uint16(clicmd.Uint("port")), TemporalHostPort: temporalHostPort, - GatewayPort: uint16(cmd.Uint("gateway-port")), - TemporalNamespace: cmd.String("temporal-namespace"), - TemporalCert: cmd.String("temporal-cert"), - TemporalKey: cmd.String("temporal-key"), + GatewayPort: uint16(clicmd.Uint("gateway-port")), + TemporalNamespace: clicmd.String("temporal-namespace"), + TemporalCert: clicmd.String("temporal-cert"), + TemporalKey: clicmd.String("temporal-key"), }) }, }, }, } + go func() { + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGQUIT) + buf := make([]byte, 1<<20) + for { + <-sigs + stacklen := runtime.Stack(buf, true) + log.Printf("=== received SIGQUIT ===\n*** goroutine dump...\n%s\n*** end\n", buf[:stacklen]) + } + }() + if err := app.Run(appCtx, os.Args); err != nil { log.Printf("error running app: %+v", err) } diff --git a/flow/workflows/sync_flow.go b/flow/workflows/sync_flow.go index 0e4c786e2c..0dd5a1e4d9 100644 --- a/flow/workflows/sync_flow.go +++ b/flow/workflows/sync_flow.go @@ -204,7 +204,7 @@ func SyncFlowWorkflow( } } if err := ctx.Err(); err != nil { - logger.Info("sync canceled: %v", err) + logger.Info("sync canceled", slog.Any("error", err)) return err } else if stop { return nil diff --git a/stacks/flow.Dockerfile b/stacks/flow.Dockerfile index 68e7e7d5aa..f991421c40 100644 --- a/stacks/flow.Dockerfile +++ b/stacks/flow.Dockerfile @@ -13,10 +13,10 @@ RUN go mod download # Copy all the code COPY flow . -# build the binary from cmd folder -WORKDIR /root/flow/cmd +# build the binary from flow folder +WORKDIR /root/flow ENV CGO_ENABLED=1 -RUN go build -ldflags="-s -w" -o /root/peer-flow . +RUN go build -ldflags="-s -w" -o /root/peer-flow FROM debian:bookworm-slim AS flow-base RUN apt-get update && apt-get install -y ca-certificates libgeos-c1v5 From b794f192d105ac5b9200faab66be54877325021e Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj Date: Mon, 4 Mar 2024 21:10:25 +0530 Subject: [PATCH 10/13] UI: Edit Alerts (#1402) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds UI to edit an alert Screenshot 2024-02-29 at 12 49 47 AM ![Screenshot 2024-03-04 at 8 57 12 PM](https://github.com/PeerDB-io/peerdb/assets/65964360/a45254d9-7033-42cc-b116-e0c282a78350) --- ui/app/alert-config/new.tsx | 57 +++++++++++++++++------- ui/app/alert-config/page.tsx | 72 +++++++++++++++++++++++-------- ui/app/alert-config/validation.ts | 1 + ui/app/api/alert-config/route.ts | 18 ++++++++ ui/components/AlertDropdown.tsx | 62 ++++++++++++++++++++++++++ ui/components/DropDialog.tsx | 10 ++++- ui/package-lock.json | 70 ++++++++++++++++++++++++++++++ ui/package.json | 1 + 8 files changed, 256 insertions(+), 35 deletions(-) create mode 100644 ui/components/AlertDropdown.tsx diff --git a/ui/app/alert-config/new.tsx b/ui/app/alert-config/new.tsx index aad7789029..ffaa41f91b 100644 --- a/ui/app/alert-config/new.tsx +++ b/ui/app/alert-config/new.tsx @@ -7,6 +7,17 @@ import { PulseLoader } from 'react-spinners'; import { ToastContainer, toast } from 'react-toastify'; import 'react-toastify/dist/ReactToastify.css'; import { alertConfigReqSchema, alertConfigType } from './validation'; + +export interface AlertConfigProps { + id?: bigint; + serviceType: string; + authToken: string; + channelIdString: string; + slotLagGBAlertThreshold: number; + openConnectionsAlertThreshold: number; + forEdit?: boolean; +} + const notifyErr = (errMsg: string) => { toast.error(errMsg, { position: 'bottom-center', @@ -21,29 +32,30 @@ function ConfigLabel() { ); } -const NewAlertConfig = () => { - const [serviceType, setServiceType] = useState(); - const [authToken, setAuthToken] = useState(); - const [channelIdString, setChannelIdString] = useState(); - const [slotLagMBAlertThreshold, setSlotLagMBAlertThreshold] = - useState(); +const NewAlertConfig = (alertProps: AlertConfigProps) => { + const [serviceType, setServiceType] = useState('slack'); + const [authToken, setAuthToken] = useState(alertProps.authToken); + const [channelIdString, setChannelIdString] = useState( + alertProps.channelIdString + ); + const [slotLagGBAlertThreshold, setSlotLagGBAlertThreshold] = + useState(alertProps.slotLagGBAlertThreshold); const [openConnectionsAlertThreshold, setOpenConnectionsAlertThreshold] = - useState(); + useState(alertProps.openConnectionsAlertThreshold); const [loading, setLoading] = useState(false); const handleAdd = async () => { if (serviceType !== 'slack') { notifyErr('Service Type must be selected'); return; } - console.log(slotLagMBAlertThreshold); - console.log(openConnectionsAlertThreshold); + const alertConfigReq: alertConfigType = { serviceType: serviceType, serviceConfig: { auth_token: authToken ?? '', channel_ids: channelIdString?.split(',')!, - slot_lag_mb_alert_threshold: slotLagMBAlertThreshold || 0, - open_connections_alert_threshold: openConnectionsAlertThreshold || 0, + slot_lag_mb_alert_threshold: slotLagGBAlertThreshold * 1000 || 20000, + open_connections_alert_threshold: openConnectionsAlertThreshold || 5, }, }; const alertReqValidity = alertConfigReqSchema.safeParse(alertConfigReq); @@ -52,8 +64,11 @@ const NewAlertConfig = () => { return; } setLoading(true); + if (alertProps.forEdit) { + alertConfigReq.id = Number(alertProps.id); + } const createRes = await fetch('/api/alert-config', { - method: 'POST', + method: alertProps.forEdit ? 'PUT' : 'POST', body: JSON.stringify(alertConfigReq), }); const createStatus = await createRes.text(); @@ -86,6 +101,10 @@ const NewAlertConfig = () => { }, ]} placeholder='Select provider' + defaultValue={{ + value: 'slack', + label: 'Slack', + }} formatOptionLabel={ConfigLabel} onChange={(val, _) => val && setServiceType(val.value)} /> @@ -113,14 +132,14 @@ const NewAlertConfig = () => {
-

Slot Lag Alert Threshold (in MB)

+

Slot Lag Alert Threshold (in GB)

setSlotLagMBAlertThreshold(e.target.valueAsNumber)} + value={slotLagGBAlertThreshold} + onChange={(e) => setSlotLagGBAlertThreshold(e.target.valueAsNumber)} />
@@ -143,7 +162,13 @@ const NewAlertConfig = () => { onClick={handleAdd} variant='normalSolid' > - {loading ? : 'Create'} + {loading ? ( + + ) : alertProps.forEdit ? ( + 'Update' + ) : ( + 'Create' + )} diff --git a/ui/app/alert-config/page.tsx b/ui/app/alert-config/page.tsx index 4932255487..7d76872827 100644 --- a/ui/app/alert-config/page.tsx +++ b/ui/app/alert-config/page.tsx @@ -1,6 +1,6 @@ 'use client'; +import AlertDropdown from '@/components/AlertDropdown'; import ConfigJSONView from '@/components/ConfigJSONView'; -import { DropDialog } from '@/components/DropDialog'; import { Button } from '@/lib/Button'; import { Icon } from '@/lib/Icon'; import { Label } from '@/lib/Label'; @@ -11,8 +11,7 @@ import { PulseLoader } from 'react-spinners'; import useSWR from 'swr'; import { UAlertConfigResponse } from '../dto/AlertDTO'; import { fetcher } from '../utils/swr'; -import NewAlertConfig from './new'; - +import NewAlertConfig, { AlertConfigProps } from './new'; const ServiceIcon = (serviceType: string) => { switch (serviceType.toLowerCase()) { default: @@ -22,15 +21,40 @@ const ServiceIcon = (serviceType: string) => { const AlertConfigPage: React.FC = () => { const { data: alerts, - error, isLoading, }: { data: UAlertConfigResponse[]; error: any; isLoading: boolean; } = useSWR('/api/alert-config', fetcher); - const [newConfig, setNewConfig] = useState(false); + const blankAlert: AlertConfigProps = { + serviceType: '', + authToken: '', + channelIdString: '', + slotLagGBAlertThreshold: 20, + openConnectionsAlertThreshold: 5, + forEdit: false, + }; + const [inEditOrAddMode, setInEditOrAddMode] = useState(false); + const [editAlertConfig, setEditAlertConfig] = + useState(blankAlert); + const onEdit = (alertConfig: UAlertConfigResponse, id: bigint) => { + setInEditOrAddMode(true); + const configJSON = JSON.stringify(alertConfig.service_config); + const channelIds: string[] = JSON.parse(configJSON)?.channel_ids; + setEditAlertConfig({ + id, + serviceType: alertConfig.service_type, + authToken: JSON.parse(configJSON)?.auth_token, + channelIdString: channelIds.join(','), + slotLagGBAlertThreshold: + (JSON.parse(configJSON)?.slot_lag_mb_alert_threshold as number) / 1000, + openConnectionsAlertThreshold: + JSON.parse(configJSON)?.open_connections_alert_threshold, + forEdit: true, + }); + }; return (
@@ -54,16 +78,22 @@ const AlertConfigPage: React.FC = () => { > {alerts?.length ? ( - alerts.map((alert: UAlertConfigResponse, index) => ( + alerts.map((alertConfig: UAlertConfigResponse, index) => ( - {alert.id} - {ServiceIcon(alert.service_type)} + {alertConfig.id} + + + {ServiceIcon(alertConfig.service_type)} -
+
@@ -75,7 +105,11 @@ const AlertConfigPage: React.FC = () => { justifyContent: 'center', }} > - + onEdit(alertConfig, alertConfig.id)} + />
@@ -94,17 +128,21 @@ const AlertConfigPage: React.FC = () => { )} - {newConfig && } + {inEditOrAddMode && } ); }; diff --git a/ui/app/alert-config/validation.ts b/ui/app/alert-config/validation.ts index 0d2007b501..3256c51aa0 100644 --- a/ui/app/alert-config/validation.ts +++ b/ui/app/alert-config/validation.ts @@ -1,6 +1,7 @@ import z from 'zod'; export const alertConfigReqSchema = z.object({ + id: z.optional(z.number()), serviceType: z.enum(['slack'], { errorMap: (issue, ctx) => ({ message: 'Invalid service type' }), }), diff --git a/ui/app/api/alert-config/route.ts b/ui/app/api/alert-config/route.ts index 6966d2c146..7f66da191f 100644 --- a/ui/app/api/alert-config/route.ts +++ b/ui/app/api/alert-config/route.ts @@ -40,3 +40,21 @@ export async function DELETE(request: Request) { return new Response(deleteStatus); } + +export async function PUT(request: Request) { + const alertConfigReq: alertConfigType = await request.json(); + const editRes = await prisma.alerting_config.update({ + data: { + service_type: alertConfigReq.serviceType, + service_config: alertConfigReq.serviceConfig, + }, + where: { + id: alertConfigReq.id, + }, + }); + let editStatus: 'success' | 'error' = 'error'; + if (editRes.id) { + editStatus = 'success'; + } + return new Response(editStatus); +} diff --git a/ui/components/AlertDropdown.tsx b/ui/components/AlertDropdown.tsx new file mode 100644 index 0000000000..dd3ae482e3 --- /dev/null +++ b/ui/components/AlertDropdown.tsx @@ -0,0 +1,62 @@ +import { Button } from '@/lib/Button/Button'; +import { Icon } from '@/lib/Icon'; +import * as DropdownMenu from '@radix-ui/react-dropdown-menu'; +import { useState } from 'react'; +import { DropDialog } from './DropDialog'; +const AlertDropdown = ({ + disable, + alertId, + onEdit, +}: { + disable: boolean; + alertId: bigint; + onEdit: () => void; +}) => { + const [open, setOpen] = useState(false); + const handleToggle = () => { + setOpen((prevOpen) => !prevOpen); + }; + + const handleClose = () => { + setOpen(false); + }; + + return ( + + + + + + + + + + + + + + + + + ); +}; + +export default AlertDropdown; diff --git a/ui/components/DropDialog.tsx b/ui/components/DropDialog.tsx index 3da1d8ffdd..63acb3ef2b 100644 --- a/ui/components/DropDialog.tsx +++ b/ui/components/DropDialog.tsx @@ -113,8 +113,14 @@ export const DropDialog = ({ noInteract={true} size='large' triggerButton={ - } > diff --git a/ui/package-lock.json b/ui/package-lock.json index 92d72704e5..c2a9f74420 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -14,6 +14,7 @@ "@radix-ui/react-checkbox": "^1.0.4", "@radix-ui/react-collapsible": "^1.0.3", "@radix-ui/react-dialog": "^1.0.5", + "@radix-ui/react-dropdown-menu": "^2.0.6", "@radix-ui/react-form": "^0.0.3", "@radix-ui/react-icons": "^1.3.0", "@radix-ui/react-popover": "^1.0.7", @@ -3990,6 +3991,35 @@ } } }, + "node_modules/@radix-ui/react-dropdown-menu": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.0.6.tgz", + "integrity": "sha512-i6TuFOoWmLWq+M/eCLGd/bQ2HfAX1RJgvrBQ6AQLmzfvsLdefxbWu8G9zczcPFfcSPehz9GcpF6K9QYreFV8hA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-context": "1.0.1", + "@radix-ui/react-id": "1.0.1", + "@radix-ui/react-menu": "2.0.6", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-use-controllable-state": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-focus-guards": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.0.1.tgz", @@ -4109,6 +4139,46 @@ } } }, + "node_modules/@radix-ui/react-menu": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.0.6.tgz", + "integrity": "sha512-BVkFLS+bUC8HcImkRKPSiVumA1VPOOEC5WBMiT+QAVsPzW1FJzI9KnqgGxVDPBcql5xXrHkD3JOVoXWEXD8SYA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-collection": "1.0.3", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-context": "1.0.1", + "@radix-ui/react-direction": "1.0.1", + "@radix-ui/react-dismissable-layer": "1.0.5", + "@radix-ui/react-focus-guards": "1.0.1", + "@radix-ui/react-focus-scope": "1.0.4", + "@radix-ui/react-id": "1.0.1", + "@radix-ui/react-popper": "1.1.3", + "@radix-ui/react-portal": "1.0.4", + "@radix-ui/react-presence": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-roving-focus": "1.0.4", + "@radix-ui/react-slot": "1.0.2", + "@radix-ui/react-use-callback-ref": "1.0.1", + "aria-hidden": "^1.1.1", + "react-remove-scroll": "2.5.5" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-popover": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.0.7.tgz", diff --git a/ui/package.json b/ui/package.json index c9987de1e7..41575fcd7e 100644 --- a/ui/package.json +++ b/ui/package.json @@ -20,6 +20,7 @@ "@radix-ui/react-checkbox": "^1.0.4", "@radix-ui/react-collapsible": "^1.0.3", "@radix-ui/react-dialog": "^1.0.5", + "@radix-ui/react-dropdown-menu": "^2.0.6", "@radix-ui/react-form": "^0.0.3", "@radix-ui/react-icons": "^1.3.0", "@radix-ui/react-popover": "^1.0.7", From 15c9a0e1b79962f5f65c0d4a6a04839c87d74f3a Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj Date: Mon, 4 Mar 2024 22:59:57 +0530 Subject: [PATCH 11/13] UI: Collapsible sidebar and fixes (#1418) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Collapsible sidebar - Remove extra borders from tables - Change dropdown colors https://github.com/PeerDB-io/peerdb/assets/65964360/e5c1c47f-c4b4-431f-ab60-a9881330dd5a ![Screenshot 2024-02-29 at 10 42 05 PM](https://github.com/PeerDB-io/peerdb/assets/65964360/e07d2e6c-0060-4814-9dd7-3cf0827b5597) --- ui/app/alert-config/new.tsx | 2 + ui/app/alert-config/page.tsx | 5 +- ui/app/mirrors/[mirrorId]/cdc.tsx | 2 + ui/app/mirrors/[mirrorId]/cdcGraph.tsx | 2 + ui/app/mirrors/[mirrorId]/syncStatusTable.tsx | 2 + ui/app/mirrors/[mirrorId]/tablePairs.tsx | 80 ++--- ui/app/mirrors/create/mirrorcards.tsx | 12 +- ui/app/mirrors/create/page.tsx | 329 +++++++++--------- ui/app/mirrors/create/qrep/qrep.tsx | 3 + ui/app/mirrors/create/qrep/upsertcols.tsx | 2 + .../status/qrep/[mirrorId]/qrepGraph.tsx | 2 + .../qrep/[mirrorId]/qrepStatusTable.tsx | 3 - ui/app/mirrors/tables.tsx | 42 +-- ui/app/page.tsx | 2 +- ui/app/peers/[peerName]/lagGraph.tsx | 3 + ui/app/peers/[peerName]/page.tsx | 9 +- ui/app/peers/[peerName]/style.ts | 7 +- ui/app/peers/create/[peerType]/page.tsx | 20 +- ui/app/peers/create/page.tsx | 35 +- ui/app/peers/page.tsx | 4 +- ui/app/peers/peersTable.tsx | 2 + ui/app/styles/select.tsx | 12 + ui/components/PeerInfo.tsx | 6 +- ui/components/SelectSource.tsx | 2 + ui/components/SidebarComponent.tsx | 81 +++-- ui/lib/Layout/Layout.styles.ts | 16 +- ui/lib/Layout/Layout.tsx | 3 +- 27 files changed, 353 insertions(+), 335 deletions(-) create mode 100644 ui/app/styles/select.tsx diff --git a/ui/app/alert-config/new.tsx b/ui/app/alert-config/new.tsx index ffaa41f91b..6a399e4c81 100644 --- a/ui/app/alert-config/new.tsx +++ b/ui/app/alert-config/new.tsx @@ -6,6 +6,7 @@ import ReactSelect from 'react-select'; import { PulseLoader } from 'react-spinners'; import { ToastContainer, toast } from 'react-toastify'; import 'react-toastify/dist/ReactToastify.css'; +import SelectTheme from '../styles/select'; import { alertConfigReqSchema, alertConfigType } from './validation'; export interface AlertConfigProps { @@ -107,6 +108,7 @@ const NewAlertConfig = (alertProps: AlertConfigProps) => { }} formatOptionLabel={ConfigLabel} onChange={(val, _) => val && setServiceType(val.value)} + theme={SelectTheme} />
diff --git a/ui/app/alert-config/page.tsx b/ui/app/alert-config/page.tsx index 7d76872827..514f586155 100644 --- a/ui/app/alert-config/page.tsx +++ b/ui/app/alert-config/page.tsx @@ -10,6 +10,7 @@ import React, { useState } from 'react'; import { PulseLoader } from 'react-spinners'; import useSWR from 'swr'; import { UAlertConfigResponse } from '../dto/AlertDTO'; +import { tableStyle } from '../peers/[peerName]/style'; import { fetcher } from '../utils/swr'; import NewAlertConfig, { AlertConfigProps } from './new'; const ServiceIcon = (serviceType: string) => { @@ -73,9 +74,7 @@ const AlertConfigPage: React.FC = () => { alerts.
-
+
{alerts?.length ? ( alerts.map((alertConfig: UAlertConfigResponse, index) => ( diff --git a/ui/app/mirrors/[mirrorId]/cdc.tsx b/ui/app/mirrors/[mirrorId]/cdc.tsx index 3ca1737251..61ebc03434 100644 --- a/ui/app/mirrors/[mirrorId]/cdc.tsx +++ b/ui/app/mirrors/[mirrorId]/cdc.tsx @@ -1,5 +1,6 @@ 'use client'; import { SyncStatusRow } from '@/app/dto/MirrorsDTO'; +import SelectTheme from '@/app/styles/select'; import TimeLabel from '@/components/TimeComponent'; import { CloneTableSummary, @@ -153,6 +154,7 @@ export const SnapshotStatusTable = ({ status }: SnapshotStatusProps) => { value: 'cloneStartTime', label: 'Start Time', }} + theme={SelectTheme} />
+
+ Source Table + Destination Table + + } > - - ( + - - - - - - {shownTables?.map((table) => ( - - - - - ))} - -
- Source Table - - Destination Table -
- {table.sourceTableIdentifier} - - {table.destinationTableIdentifier} -
+ {table.sourceTableIdentifier} + + {table.destinationTableIdentifier} + + + ))} +
); diff --git a/ui/app/mirrors/create/mirrorcards.tsx b/ui/app/mirrors/create/mirrorcards.tsx index 207f4be3b4..6cf2dcd348 100644 --- a/ui/app/mirrors/create/mirrorcards.tsx +++ b/ui/app/mirrors/create/mirrorcards.tsx @@ -46,7 +46,6 @@ const MirrorCards = ({ style={{ padding: '0.5rem', width: '35%', - minHeight: '22vh', marginRight: card.title === 'Query Replication' ? '0.5rem' : 'auto', marginLeft: @@ -71,14 +70,17 @@ const MirrorCards = ({
{card.description}
- + ); })} diff --git a/ui/app/mirrors/create/page.tsx b/ui/app/mirrors/create/page.tsx index 76052329d7..4d79fea628 100644 --- a/ui/app/mirrors/create/page.tsx +++ b/ui/app/mirrors/create/page.tsx @@ -1,4 +1,5 @@ 'use client'; +import SelectTheme from '@/app/styles/select'; import { DBTypeToImageMapping } from '@/components/PeerComponent'; import { RequiredIndicator } from '@/components/RequiredIndicator'; import { QRepConfig } from '@/grpc_generated/flow'; @@ -105,183 +106,195 @@ export default function CreateMirrors() { }; return ( -
- - - - - - - - Mirror Name} - action={ - ) => - setMirrorName(e.target.value) - } - /> - } - /> - {['src', 'dst'].map((peerEnd, index) => { - return ( - - {peerEnd === 'src' ? 'Source Peer' : 'Destination Peer'} - {RequiredIndicator(true)} - - } - action={ -
-
- - handlePeer(val, peerEnd as 'src' | 'dst', setConfig) +
+
+ + + + + + + + Mirror Name} + action={ + ) => + setMirrorName(e.target.value) + } + /> + } + /> + {['src', 'dst'].map((peerEnd, index) => { + return ( + + {peerEnd === 'src' ? 'Source Peer' : 'Destination Peer'} + {RequiredIndicator(true)} + + } + action={ +
+
+ + handlePeer(val, peerEnd as 'src' | 'dst', setConfig) + } + options={ + (peerEnd === 'src' + ? peers.filter( + (peer) => peer.type == DBType.POSTGRES + ) + : peers) ?? [] + } + getOptionValue={getPeerValue} + formatOptionLabel={getPeerLabel} + theme={SelectTheme} + /> +
+ peer.type == DBType.POSTGRES) - : peers) ?? [] + link={ + 'https://docs.peerdb.io/usecases/Real-time%20CDC/postgres-to-snowflake#prerequisites' } - getOptionValue={getPeerValue} - formatOptionLabel={getPeerLabel} />
- -
- } - /> - ); - })} + } + /> + ); + })} - + - {mirrorType === 'Query Replication' && ( - - )} + {mirrorType === 'Query Replication' && ( + + )} - {mirrorType && ( - - )} - {!creating && } - {mirrorType === '' ? ( - <> - ) : mirrorType === 'CDC' ? ( - - ) : ( - - )} - - - {mirrorType && ( -
- {mirrorType === 'CDC' && ( + {mirrorType && ( + + )} + {!creating && } + {mirrorType === '' ? ( + <> + ) : mirrorType === 'CDC' ? ( + + ) : ( + + )} + + + {mirrorType && ( +
+ {mirrorType === 'CDC' && ( + + )} - )} - -
- )} -
+
+ )} +
+
); } diff --git a/ui/app/mirrors/create/qrep/qrep.tsx b/ui/app/mirrors/create/qrep/qrep.tsx index 28a7adc282..ecd4a63630 100644 --- a/ui/app/mirrors/create/qrep/qrep.tsx +++ b/ui/app/mirrors/create/qrep/qrep.tsx @@ -1,4 +1,5 @@ 'use client'; +import SelectTheme from '@/app/styles/select'; import { RequiredIndicator } from '@/components/RequiredIndicator'; import { QRepConfig, QRepWriteType } from '@/grpc_generated/flow'; import { DBType } from '@/grpc_generated/peers'; @@ -202,6 +203,7 @@ export default function QRepConfigForm({ val && handleChange(val.value, setting) } options={WriteModes} + theme={SelectTheme} /> ) : setting.label === 'Upsert Key Columns' ? ( )}
diff --git a/ui/app/mirrors/create/qrep/upsertcols.tsx b/ui/app/mirrors/create/qrep/upsertcols.tsx index cd1009f48f..e49e004886 100644 --- a/ui/app/mirrors/create/qrep/upsertcols.tsx +++ b/ui/app/mirrors/create/qrep/upsertcols.tsx @@ -1,4 +1,5 @@ 'use client'; +import SelectTheme from '@/app/styles/select'; import { QRepConfig, QRepWriteMode, @@ -69,6 +70,7 @@ const UpsertColsDisplay = ({ }} isLoading={loading} options={columns} + theme={SelectTheme} />
val && setAggregateType(val.value)} + theme={SelectTheme} />
diff --git a/ui/app/mirrors/status/qrep/[mirrorId]/qrepStatusTable.tsx b/ui/app/mirrors/status/qrep/[mirrorId]/qrepStatusTable.tsx index 667ddcbff4..d0a01a1468 100644 --- a/ui/app/mirrors/status/qrep/[mirrorId]/qrepStatusTable.tsx +++ b/ui/app/mirrors/status/qrep/[mirrorId]/qrepStatusTable.tsx @@ -162,9 +162,6 @@ export default function QRepStatusTable({ > -
(''); @@ -23,26 +23,8 @@ export function CDCFlows({ cdcFlows }: { cdcFlows: any }) { return ( <> -
+
, - right: ( - ) => - setSearchQuery(e.target.value) - } - /> - ), - }} header={ {['Name', 'Source', 'Destination', 'Start Time', 'Logs', ''].map( @@ -131,26 +113,8 @@ export function QRepFlows({ return ( <> -
+
, - right: ( - ) => - setSearchQuery(e.target.value) - } - /> - ), - }} header={ {['Name', 'Source', 'Destination', 'Start Time', ''].map( diff --git a/ui/app/page.tsx b/ui/app/page.tsx index 4e84221bb7..83a0f1ccd6 100644 --- a/ui/app/page.tsx +++ b/ui/app/page.tsx @@ -5,7 +5,7 @@ import { Layout, LayoutMain } from '@/lib/Layout'; export default function Home() { return ( }> - +
PeerDB Home Page
diff --git a/ui/app/peers/[peerName]/lagGraph.tsx b/ui/app/peers/[peerName]/lagGraph.tsx index 31dbef3062..82f0ed9aae 100644 --- a/ui/app/peers/[peerName]/lagGraph.tsx +++ b/ui/app/peers/[peerName]/lagGraph.tsx @@ -1,5 +1,6 @@ 'use client'; import { SlotLagPoint } from '@/app/dto/PeersDTO'; +import SelectTheme from '@/app/styles/select'; import { formatGraphLabel, timeOptions } from '@/app/utils/graph'; import { Label } from '@/lib/Label'; import { ProgressCircle } from '@/lib/ProgressCircle/ProgressCircle'; @@ -92,6 +93,7 @@ function LagGraph({ slotNames }: { slotNames: string[] }) { ? { value: selectedSlot, label: selectedSlot } : undefined } + theme={SelectTheme} /> val && setTimeSince(val.value)} + theme={SelectTheme} /> { const stats = await getStatData(); return ( -
+
- - {configComponentMap(dbType)} - - + @@ -50,7 +63,7 @@ export default function CreatePeer() { - - +
+
); } diff --git a/ui/app/peers/page.tsx b/ui/app/peers/page.tsx index 34eea42f91..d7812e54c0 100644 --- a/ui/app/peers/page.tsx +++ b/ui/app/peers/page.tsx @@ -18,8 +18,8 @@ export default function Peers() { const { data: peers, error, isLoading } = useSWR('/api/peers', fetcher); return ( - - + +
({ + ...theme, + colors: { + ...theme.colors, + primary25: 'rgba(48, 164, 108, 0.3)', + primary: 'rgba(48, 164, 108, 0.3)', + }, +}); + +export default SelectTheme; diff --git a/ui/components/PeerInfo.tsx b/ui/components/PeerInfo.tsx index bdff740032..9c6cf79eb0 100644 --- a/ui/components/PeerInfo.tsx +++ b/ui/components/PeerInfo.tsx @@ -26,10 +26,8 @@ export const PeerInfo = ({ peerName }: { peerName: string }) => { size='auto' style={{ width: '40rem' }} triggerButton={ - } > diff --git a/ui/components/SelectSource.tsx b/ui/components/SelectSource.tsx index 04cc5bf3a3..4aefe67082 100644 --- a/ui/components/SelectSource.tsx +++ b/ui/components/SelectSource.tsx @@ -1,4 +1,5 @@ 'use client'; +import SelectTheme from '@/app/styles/select'; import { DBType } from '@/grpc_generated/peers'; import Image from 'next/image'; import { Dispatch, SetStateAction } from 'react'; @@ -44,6 +45,7 @@ export default function SelectSource({ defaultValue={dbTypes.find((opt) => opt.value === peerType)} onChange={(val, _) => val && setPeerType(val.value)} formatOptionLabel={SourceLabel} + theme={SelectTheme} /> ); } diff --git a/ui/components/SidebarComponent.tsx b/ui/components/SidebarComponent.tsx index 0ecf06598b..750648ef51 100644 --- a/ui/components/SidebarComponent.tsx +++ b/ui/components/SidebarComponent.tsx @@ -4,6 +4,7 @@ import { UVersionResponse } from '@/app/dto/VersionDTO'; import { fetcher } from '@/app/utils/swr'; import Logout from '@/components/Logout'; import { BrandLogo } from '@/lib/BrandLogo'; +import { Button } from '@/lib/Button'; import { Icon } from '@/lib/Icon'; import { Label } from '@/lib/Label'; import { RowWithSelect } from '@/lib/Layout'; @@ -21,33 +22,61 @@ const centerFlexStyle = { marginBottom: '0.5rem', }; -export default function SidebarComponent(props: {}) { +export default function SidebarComponent() { const timezones = ['UTC', 'Local', 'Relative']; const [zone, setZone] = useLocalStorage('timezone-ui', ''); const { data: version, - error, isLoading, }: { data: UVersionResponse; error: any; isLoading: boolean } = useSWR( '/api/version', fetcher ); + const [sidebarState, setSidebarState] = useLocalStorage( + 'peerdb-sidebar', + 'open' + ); return ( -
- -
- +
+ {sidebarState === 'open' && ( + + )} + +
} bottomRow={ - <> -
-
+ sidebarState === 'open' ? ( + <> +
Timezone:} action={ @@ -73,18 +102,24 @@ export default function SidebarComponent(props: {}) { } />
-
- - + + + ) : ( + <> + ) } bottomLabel={ -
- -
+ sidebarState === 'open' ? ( +
+ +
+ ) : ( + <> + ) } > } > - Peers + {sidebarState === 'open' && 'Peers'} } > - Mirrors + {sidebarState === 'open' && 'Mirrors'} } > - Alert Configuration + {sidebarState === 'open' && 'Alert Configuration'} diff --git a/ui/lib/Layout/Layout.styles.ts b/ui/lib/Layout/Layout.styles.ts index d9ac6fadd0..61fc360aa6 100644 --- a/ui/lib/Layout/Layout.styles.ts +++ b/ui/lib/Layout/Layout.styles.ts @@ -1,25 +1,17 @@ import styled from 'styled-components'; export const LayoutWrapper = styled.div` - display: grid; - grid-template-columns: 250px auto; + display: flex; background-color: ${({ theme }) => theme.colors.base.background.normal}; min-height: 100vh; + width: 100vw; `; -export type ContentWrapperProps = { - $fullWidth?: boolean; -}; - -export const ContentWrapper = styled.div` - grid-column: ${({ $fullWidth = false }) => - $fullWidth ? '1 / -1' : '2 / -1'}; +export const ContentWrapper = styled.div` background-color: ${({ theme }) => theme.colors.base.background.normal}; padding: ${({ theme }) => theme.spacing.medium}; - overflow-y: auto; - - display: grid; height: 100vh; + width: 100%; overflow-y: auto; `; diff --git a/ui/lib/Layout/Layout.tsx b/ui/lib/Layout/Layout.tsx index 97222c0f65..28b41fed65 100644 --- a/ui/lib/Layout/Layout.tsx +++ b/ui/lib/Layout/Layout.tsx @@ -81,12 +81,11 @@ type LayoutProps = PropsWithChildren<{ }>; export function Layout({ sidebar, children }: LayoutProps) { const Sidebar = isDefined(sidebar) && sidebar; - const fullWidth = !isDefined(sidebar); return ( {Sidebar} - {children} + {children} ); } From 39c6e7e380ceb61ba257fc3b43aa7300d2de1c51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Mon, 4 Mar 2024 17:45:11 +0000 Subject: [PATCH 12/13] Snowflake normalize: check gCtx.Err() while iterating through tables (#1427) errgroup.Go unconditionally executes the function / waits, causing pauses to be ignored for awhile --- flow/connectors/snowflake/snowflake.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flow/connectors/snowflake/snowflake.go b/flow/connectors/snowflake/snowflake.go index 9382f59c54..4c7c378dff 100644 --- a/flow/connectors/snowflake/snowflake.go +++ b/flow/connectors/snowflake/snowflake.go @@ -516,6 +516,10 @@ func (c *SnowflakeConnector) NormalizeRecords(ctx context.Context, req *model.No g.SetLimit(8) // limit parallel merges to 8 for _, tableName := range destinationTableNames { + if err := gCtx.Err(); err != nil { + return nil, fmt.Errorf("canceled while normalizing records: %w", err) + } + g.Go(func() error { mergeGen := &mergeStmtGenerator{ rawTableName: getRawTableIdentifier(req.FlowJobName), @@ -547,10 +551,6 @@ func (c *SnowflakeConnector) NormalizeRecords(ctx context.Context, req *model.No endTime := time.Now() c.logger.Info(fmt.Sprintf("[merge] merged records into %s, took: %d seconds", tableName, endTime.Sub(startTime)/time.Second)) - if err != nil { - c.logger.Error("[merge] error while normalizing records", "error", err) - return err - } rowsAffected, err := result.RowsAffected() if err != nil { From 67fd5d3d5dda941e8e138217ea5152ff3bd3009d Mon Sep 17 00:00:00 2001 From: Kunal Gupta <39487888+iamKunalGupta@users.noreply.github.com> Date: Tue, 5 Mar 2024 00:24:31 +0530 Subject: [PATCH 13/13] feat: add telemetry/alerts via sns (#1411) --- flow/activities/snapshot_activity.go | 3 + flow/alerting/alerting.go | 43 +++++++- flow/cmd/snapshot_worker.go | 2 +- flow/cmd/worker.go | 2 +- flow/go.mod | 17 ++- flow/go.sum | 18 ++-- flow/peerdbenv/config.go | 5 + flow/shared/telemetry/interface.go | 25 +++++ flow/shared/telemetry/sns_message_sender.go | 110 ++++++++++++++++++++ 9 files changed, 206 insertions(+), 19 deletions(-) create mode 100644 flow/shared/telemetry/interface.go create mode 100644 flow/shared/telemetry/sns_message_sender.go diff --git a/flow/activities/snapshot_activity.go b/flow/activities/snapshot_activity.go index 6ee2bb5a8b..262d3d0dbd 100644 --- a/flow/activities/snapshot_activity.go +++ b/flow/activities/snapshot_activity.go @@ -32,6 +32,7 @@ func (a *SnapshotActivity) CloseSlotKeepAlive(ctx context.Context, flowJobName s connectors.CloseConnector(ctx, s.connector) delete(a.SnapshotConnections, flowJobName) } + a.Alerter.LogFlowEvent(ctx, flowJobName, "Ended Snapshot Flow Job - "+flowJobName) return nil } @@ -49,6 +50,8 @@ func (a *SnapshotActivity) SetupReplication( return nil, nil } + a.Alerter.LogFlowEvent(ctx, config.FlowJobName, "Started Snapshot Flow Job - "+config.FlowJobName) + conn, err := connectors.GetCDCPullConnector(ctx, config.PeerConnectionConfig) if err != nil { return nil, fmt.Errorf("failed to get connector: %w", err) diff --git a/flow/alerting/alerting.go b/flow/alerting/alerting.go index a837f58889..6fc903aa79 100644 --- a/flow/alerting/alerting.go +++ b/flow/alerting/alerting.go @@ -14,11 +14,13 @@ import ( "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/logger" "github.com/PeerDB-io/peer-flow/peerdbenv" + "github.com/PeerDB-io/peer-flow/shared/telemetry" ) // alerting service, no cool name :( type Alerter struct { - catalogPool *pgxpool.Pool + catalogPool *pgxpool.Pool + telemetrySender telemetry.Sender } func (a *Alerter) registerSendersFromPool(ctx context.Context) ([]*slackAlertSender, error) { @@ -50,13 +52,25 @@ func (a *Alerter) registerSendersFromPool(ctx context.Context) ([]*slackAlertSen } // doesn't take care of closing pool, needs to be done externally. -func NewAlerter(catalogPool *pgxpool.Pool) *Alerter { +func NewAlerter(ctx context.Context, catalogPool *pgxpool.Pool) *Alerter { if catalogPool == nil { panic("catalog pool is nil for Alerter") } - + snsTopic := peerdbenv.PeerDBTelemetryAWSSNSTopicArn() + var snsMessageSender telemetry.Sender + if snsTopic != "" { + var err error + snsMessageSender, err = telemetry.NewSNSMessageSenderWithNewClient(ctx, &telemetry.SNSMessageSenderConfig{ + Topic: snsTopic, + }) + logger.LoggerFromCtx(ctx).Info("Successfully registered telemetry sender") + if err != nil { + panic(fmt.Sprintf("unable to setup telemetry is nil for Alerter %+v", err)) + } + } return &Alerter{ - catalogPool: catalogPool, + catalogPool: catalogPool, + telemetrySender: snsMessageSender, } } @@ -193,6 +207,22 @@ func (a *Alerter) checkAndAddAlertToCatalog(ctx context.Context, alertKey string return false } +func (a *Alerter) sendTelemetryMessage(ctx context.Context, flowName string, more string, level telemetry.Level) { + if a.telemetrySender != nil { + details := fmt.Sprintf("[%s] %s", flowName, more) + _, err := a.telemetrySender.SendMessage(ctx, details, details, telemetry.Attributes{ + Level: level, + DeploymentUID: peerdbenv.PeerDBDeploymentUID(), + Tags: []string{flowName}, + Type: flowName, + }) + if err != nil { + logger.LoggerFromCtx(ctx).Warn("failed to send message to telemetrySender", slog.Any("error", err)) + return + } + } +} + func (a *Alerter) LogFlowError(ctx context.Context, flowName string, err error) { errorWithStack := fmt.Sprintf("%+v", err) _, err = a.catalogPool.Exec(ctx, @@ -202,6 +232,11 @@ func (a *Alerter) LogFlowError(ctx context.Context, flowName string, err error) logger.LoggerFromCtx(ctx).Warn("failed to insert flow error", slog.Any("error", err)) return } + a.sendTelemetryMessage(ctx, flowName, errorWithStack, telemetry.ERROR) +} + +func (a *Alerter) LogFlowEvent(ctx context.Context, flowName string, info string) { + a.sendTelemetryMessage(ctx, flowName, info, telemetry.INFO) } func (a *Alerter) LogFlowInfo(ctx context.Context, flowName string, info string) { diff --git a/flow/cmd/snapshot_worker.go b/flow/cmd/snapshot_worker.go index eb9021de1a..d5b9d4b51f 100644 --- a/flow/cmd/snapshot_worker.go +++ b/flow/cmd/snapshot_worker.go @@ -68,7 +68,7 @@ func SnapshotWorkerMain(opts *SnapshotWorkerOptions) (client.Client, worker.Work w.RegisterWorkflow(peerflow.SnapshotFlowWorkflow) w.RegisterActivity(&activities.SnapshotActivity{ SnapshotConnections: make(map[string]activities.SlotSnapshotSignal), - Alerter: alerting.NewAlerter(conn), + Alerter: alerting.NewAlerter(context.Background(), conn), }) return c, w, nil diff --git a/flow/cmd/worker.go b/flow/cmd/worker.go index 4014d47596..8977108be7 100644 --- a/flow/cmd/worker.go +++ b/flow/cmd/worker.go @@ -120,7 +120,7 @@ func WorkerMain(opts *WorkerOptions) (client.Client, worker.Worker, error) { w.RegisterActivity(&activities.FlowableActivity{ CatalogPool: conn, - Alerter: alerting.NewAlerter(conn), + Alerter: alerting.NewAlerter(context.Background(), conn), CdcCache: make(map[string]connectors.CDCPullConnector), }) diff --git a/flow/go.mod b/flow/go.mod index a6ad81e4f7..da03d355b3 100644 --- a/flow/go.mod +++ b/flow/go.mod @@ -1,6 +1,6 @@ module github.com/PeerDB-io/peer-flow -go 1.22 +go 1.22.0 require ( cloud.google.com/go v0.112.0 @@ -10,10 +10,12 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.0.3 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.2.0 github.com/ClickHouse/clickhouse-go/v2 v2.18.0 - github.com/aws/aws-sdk-go-v2 v1.25.0 + github.com/aws/aws-sdk-go-v2 v1.25.2 + github.com/aws/aws-sdk-go-v2/config v1.27.0 github.com/aws/aws-sdk-go-v2/credentials v1.17.0 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.16.1 github.com/aws/aws-sdk-go-v2/service/s3 v1.50.0 + github.com/aws/aws-sdk-go-v2/service/sns v1.29.1 github.com/cockroachdb/pebble v1.1.0 github.com/google/uuid v1.6.0 github.com/grafana/pyroscope-go v1.1.1 @@ -52,6 +54,11 @@ require ( github.com/DataDog/zstd v1.5.5 // indirect github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect github.com/apache/arrow/go/v14 v14.0.2 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.0 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.19.0 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.22.0 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.27.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cockroachdb/errors v1.11.1 // indirect @@ -103,14 +110,14 @@ require ( github.com/AzureAD/microsoft-authentication-library-for-go v1.2.1 // indirect github.com/andybalholm/brotli v1.1.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.0 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.0 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.0 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.2 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.2 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.0 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.0 // indirect github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.0 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.0 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.0 // indirect - github.com/aws/smithy-go v1.20.0 // indirect + github.com/aws/smithy-go v1.20.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/djherbis/buffer v1.2.0 github.com/djherbis/nio/v3 v3.0.1 diff --git a/flow/go.sum b/flow/go.sum index 47d6c5edf6..fc03edf962 100644 --- a/flow/go.sum +++ b/flow/go.sum @@ -64,8 +64,8 @@ github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer5 github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/arrow/go/v14 v14.0.2 h1:N8OkaJEOfI3mEZt07BIkvo4sC6XDbL+48MBPWO5IONw= github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= -github.com/aws/aws-sdk-go-v2 v1.25.0 h1:sv7+1JVJxOu/dD/sz/csHX7jFqmP001TIY7aytBWDSQ= -github.com/aws/aws-sdk-go-v2 v1.25.0/go.mod h1:G104G1Aho5WqF+SR3mDIobTABQzpYV0WxMsKxlMggOA= +github.com/aws/aws-sdk-go-v2 v1.25.2 h1:/uiG1avJRgLGiQM9X3qJM8+Qa6KRGK5rRPuXE0HUM+w= +github.com/aws/aws-sdk-go-v2 v1.25.2/go.mod h1:Evoc5AsmtveRt1komDwIsjHFyrP5tDuF1D1U+6z6pNo= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.0 h1:2UO6/nT1lCZq1LqM67Oa4tdgP1CvL1sLSxvuD+VrOeE= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.0/go.mod h1:5zGj2eA85ClyedTDK+Whsu+w9yimnVIZvhvBKrDquM8= github.com/aws/aws-sdk-go-v2/config v1.27.0 h1:J5sdGCAHuWKIXLeXiqr8II/adSvetkx0qdZwdbXXpb0= @@ -76,10 +76,10 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.0 h1:xWCwjjvVz2ojYTP4kBKUuUh github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.0/go.mod h1:j3fACuqXg4oMTQOR2yY7m0NmJY0yBK4L4sLsRXq1Ins= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.16.1 h1:FqtJUSBgT2yfZ8kZhTi9AO131qMLOzb4MiH4riAM8XM= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.16.1/go.mod h1:G3V4qNUPMHKrXW/l149QXmHjf1vlMWBO4UuGPCK4a/c= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.0 h1:NPs/EqVO+ajwOoq56EfcGKa3L3ruWuazkIw1BqxwOPw= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.0/go.mod h1:D+duLy2ylgatV+yTlQ8JTuLfDD0BnFvnQRc+o6tbZ4M= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.0 h1:ks7KGMVUMoDzcxNWUlEdI+/lokMFD136EL6DWmUOV80= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.0/go.mod h1:hL6BWM/d/qz113fVitZjbXR0E+RCTU1+x+1Idyn5NgE= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.2 h1:bNo4LagzUKbjdxE0tIcR9pMzLR2U/Tgie1Hq1HQ3iH8= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.2/go.mod h1:wRQv0nN6v9wDXuWThpovGQjqF1HFdcgWjporw14lS8k= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.2 h1:EtOU5jsPdIQNP+6Q2C5e3d65NKT1PeCiQk+9OdzO12Q= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.2/go.mod h1:tyF5sKccmDz0Bv4NrstEr+/9YkSPJHrcO7UsUKf7pWM= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.0 h1:TkbRExyKSVHELwG9gz2+gql37jjec2R5vus9faTomwE= @@ -94,14 +94,16 @@ github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.0 h1:l5puwOHr7IxECu github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.0/go.mod h1:Oov79flWa/n7Ni+lQC3z+VM7PoRM47omRqbJU9B5Y7E= github.com/aws/aws-sdk-go-v2/service/s3 v1.50.0 h1:jZAdMD1ioZdqirzzVVRhpHHWJmcGGCn8JqDYBs5nmYA= github.com/aws/aws-sdk-go-v2/service/s3 v1.50.0/go.mod h1:1o/W6JFUuREj2ExoQ21vHJgO7wakvjhol91M9eknFgs= +github.com/aws/aws-sdk-go-v2/service/sns v1.29.1 h1:K2FiR/547lI9vGuDL0Ghin4QPSEvOKxbHY9aXFq8wfU= +github.com/aws/aws-sdk-go-v2/service/sns v1.29.1/go.mod h1:PBmfgVv83oBgZVFhs/+oWsL6r0hLyB6qHRFEWwHyHn4= github.com/aws/aws-sdk-go-v2/service/sso v1.19.0 h1:u6OkVDxtBPnxPkZ9/63ynEe+8kHbtS5IfaC4PzVxzWM= github.com/aws/aws-sdk-go-v2/service/sso v1.19.0/go.mod h1:YqbU3RS/pkDVu+v+Nwxvn0i1WB0HkNWEePWbmODEbbs= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.22.0 h1:6DL0qu5+315wbsAEEmzK+P9leRwNbkp+lGjPC+CEvb8= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.22.0/go.mod h1:olUAyg+FaoFaL/zFaeQQONjOZ9HXoxgvI/c7mQTYz7M= github.com/aws/aws-sdk-go-v2/service/sts v1.27.0 h1:cjTRjh700H36MQ8M0LnDn33W3JmwC77mdxIIyPWCdpM= github.com/aws/aws-sdk-go-v2/service/sts v1.27.0/go.mod h1:nXfOBMWPokIbOY+Gi7a1psWMSvskUCemZzI+SMB7Akc= -github.com/aws/smithy-go v1.20.0 h1:6+kZsCXZwKxZS9RfISnPc4EXlHoyAkm2hPuM8X2BrrQ= -github.com/aws/smithy-go v1.20.0/go.mod h1:uo5RKksAl4PzhqaAbjd4rLgFoq5koTsQKYuGe7dklGc= +github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw= +github.com/aws/smithy-go v1.20.1/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= diff --git a/flow/peerdbenv/config.go b/flow/peerdbenv/config.go index f6128e56a7..9a59bad5ef 100644 --- a/flow/peerdbenv/config.go +++ b/flow/peerdbenv/config.go @@ -90,3 +90,8 @@ func PeerDBEnableWALHeartbeat() bool { func PeerDBEnableParallelSyncNormalize() bool { return getEnvBool("PEERDB_ENABLE_PARALLEL_SYNC_NORMALIZE", false) } + +// PEERDB_TELEMETRY_AWS_SNS_TOPIC_ARN +func PeerDBTelemetryAWSSNSTopicArn() string { + return getEnvString("PEERDB_TELEMETRY_AWS_SNS_TOPIC_ARN", "") +} diff --git a/flow/shared/telemetry/interface.go b/flow/shared/telemetry/interface.go new file mode 100644 index 0000000000..6ee7d6f391 --- /dev/null +++ b/flow/shared/telemetry/interface.go @@ -0,0 +1,25 @@ +package telemetry + +import ( + "context" +) + +type Sender interface { + SendMessage(ctx context.Context, subject string, body string, attributes Attributes) (*string, error) +} + +type Attributes struct { + Level Level + DeploymentUID string + Tags []string + Type string +} + +type Level string + +const ( + INFO Level = "INFO" + WARN Level = "WARN" + ERROR Level = "ERROR" + CRITICAL Level = "CRITICAL" +) diff --git a/flow/shared/telemetry/sns_message_sender.go b/flow/shared/telemetry/sns_message_sender.go new file mode 100644 index 0000000000..42bdd026a7 --- /dev/null +++ b/flow/shared/telemetry/sns_message_sender.go @@ -0,0 +1,110 @@ +package telemetry + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/sns" + "github.com/aws/aws-sdk-go-v2/service/sns/types" + "go.temporal.io/sdk/activity" +) + +type SNSMessageSender interface { + Sender +} + +type SNSMessageSenderImpl struct { + client *sns.Client + topic string +} + +type SNSMessageSenderConfig struct { + Topic string `json:"topic"` +} + +func (s *SNSMessageSenderImpl) SendMessage(ctx context.Context, subject string, body string, attributes Attributes) (*string, error) { + activityInfo := activity.GetInfo(ctx) + deduplicationString := strings.Join([]string{ + "deployID", attributes.DeploymentUID, + "subject", subject, + "runID", activityInfo.WorkflowExecution.RunID, + "activityName", activityInfo.ActivityType.Name, + }, " || ") + h := sha256.New() + h.Write([]byte(deduplicationString)) + deduplicationHash := hex.EncodeToString(h.Sum(nil)) + + publish, err := s.client.Publish(ctx, &sns.PublishInput{ + Message: aws.String(body), + MessageAttributes: map[string]types.MessageAttributeValue{ + "level": { + DataType: aws.String("String"), + StringValue: aws.String(string(attributes.Level)), + }, + "tags": { + DataType: aws.String("String"), + StringValue: aws.String(strings.Join(attributes.Tags, ",")), + }, + "deploymentUUID": { + DataType: aws.String("String"), + StringValue: aws.String(attributes.DeploymentUID), + }, + "entity": { + DataType: aws.String("String"), + StringValue: aws.String(attributes.DeploymentUID), + }, + "type": { + DataType: aws.String("String"), + StringValue: aws.String(attributes.Type), + }, + "alias": { // This will act as a de-duplication ID + DataType: aws.String("String"), + StringValue: aws.String(deduplicationHash), + }, + }, + Subject: aws.String(subject[:100]), + TopicArn: aws.String(s.topic), + }) + if err != nil { + return nil, err + } + return publish.MessageId, nil +} + +func NewSNSMessageSenderWithNewClient(ctx context.Context, config *SNSMessageSenderConfig) (SNSMessageSender, error) { + // Topic Region must match client region + region := strings.Split(strings.TrimPrefix(config.Topic, "arn:aws:sns:"), ":")[0] + client, err := newSnsClient(ctx, ®ion) + if err != nil { + return nil, err + } + return &SNSMessageSenderImpl{ + client: client, + topic: config.Topic, + }, nil +} + +func NewSNSMessageSender(client *sns.Client, config *SNSMessageSenderConfig) SNSMessageSender { + return &SNSMessageSenderImpl{ + client: client, + topic: config.Topic, + } +} + +func newSnsClient(ctx context.Context, region *string) (*sns.Client, error) { + sdkConfig, err := config.LoadDefaultConfig(ctx, func(options *config.LoadOptions) error { + if region != nil { + options.Region = *region + } + return nil + }) + if err != nil { + return nil, err + } + snsClient := sns.NewFromConfig(sdkConfig) + return snsClient, nil +}