From 87a54a1b71a4af88839c34cd1529e8f3995874da Mon Sep 17 00:00:00 2001 From: Kevin K Biju <52661649+heavycrystal@users.noreply.github.com> Date: Wed, 30 Aug 2023 23:37:05 +0530 Subject: [PATCH] optimizing flow tests by separating and parallelizing test suites (#356) 1. Split each connector into its own test suite, and rewrote them to allow running multiple test suites in parallel. 2. Better separation on source PG instance [shared by all test suites of a run] and destination BQ and SF instances [shared by all runs]. 3. some code cleanup and fixing some tests up. 4. Switched to using a vanilla PG instance, but configured with higher logical replication limits. --- .github/workflows/flow.yml | 20 +- flow/e2e/{ => bigquery}/bigquery_helper.go | 5 +- flow/e2e/bigquery/peer_flow_bq_test.go | 925 +++++++++ flow/e2e/bigquery/qrep_flow_bq_test.go | 123 ++ flow/e2e/congen.go | 99 + flow/e2e/{ => eventhub}/eventhub_helper.go | 2 +- flow/e2e/{ => eventhub}/peer_flow_eh_test.go | 74 +- flow/e2e/peer_flow_s3_test.go | 153 -- flow/e2e/peer_flow_test.go | 1809 ----------------- flow/e2e/postgres/qrep_flow_pg_test.go | 147 ++ flow/e2e/qrep_flow_test.go | 625 ------ flow/e2e/s3/qrep_flow_s3_test.go | 183 ++ flow/e2e/{ => s3}/s3_helper.go | 2 +- flow/e2e/snowflake/peer_flow_sf_test.go | 800 ++++++++ flow/e2e/snowflake/qrep_flow_sf_test.go | 232 +++ flow/e2e/snowflake/snowflake_helper.go | 131 ++ flow/e2e/snowflake_helper.go | 104 - .../qrep_flow_sqlserver_test.go | 102 +- flow/e2e/{ => sqlserver}/sqlserver_helper.go | 2 +- flow/e2e/test_utils.go | 290 ++- 20 files changed, 3098 insertions(+), 2730 deletions(-) rename flow/e2e/{ => bigquery}/bigquery_helper.go (99%) create mode 100644 flow/e2e/bigquery/peer_flow_bq_test.go create mode 100644 flow/e2e/bigquery/qrep_flow_bq_test.go rename flow/e2e/{ => eventhub}/eventhub_helper.go (99%) rename flow/e2e/{ => eventhub}/peer_flow_eh_test.go (57%) delete mode 100644 flow/e2e/peer_flow_s3_test.go delete mode 100644 flow/e2e/peer_flow_test.go create mode 100644 flow/e2e/postgres/qrep_flow_pg_test.go delete mode 100644 flow/e2e/qrep_flow_test.go create mode 100644 flow/e2e/s3/qrep_flow_s3_test.go rename flow/e2e/{ => s3}/s3_helper.go (99%) create mode 100644 flow/e2e/snowflake/peer_flow_sf_test.go create mode 100644 flow/e2e/snowflake/qrep_flow_sf_test.go create mode 100644 flow/e2e/snowflake/snowflake_helper.go delete mode 100644 flow/e2e/snowflake_helper.go rename flow/e2e/{ => sqlserver}/qrep_flow_sqlserver_test.go (53%) rename flow/e2e/{ => sqlserver}/sqlserver_helper.go (99%) diff --git a/.github/workflows/flow.yml b/.github/workflows/flow.yml index b7e2085b4..5f1623eba 100644 --- a/.github/workflows/flow.yml +++ b/.github/workflows/flow.yml @@ -1,10 +1,10 @@ name: Flow build and test on: - push: - branches: [main] pull_request: branches: [main] + push: + branches: [main] jobs: flow_test: @@ -12,7 +12,7 @@ jobs: timeout-minutes: 30 services: pg_cdc: - image: debezium/postgres:14-alpine + image: postgres:15.4-alpine ports: - 7132:5432 env: @@ -20,6 +20,7 @@ jobs: POSTGRES_PASSWORD: postgres POSTGRES_DB: postgres options: >- + --name pg_cdc --health-cmd pg_isready --health-interval 10s --health-timeout 5s @@ -55,18 +56,21 @@ jobs: name: "snowflake_creds.json" json: ${{ secrets.SNOWFLAKE_GH_CI_PKEY }} - - name: create hstore extension + - name: create hstore extension and increase logical replication limits run: | - sudo apt-get update - sudo apt-get install -y postgresql-client - psql -h localhost -p 7132 -U postgres -c "CREATE EXTENSION hstore;" + docker exec pg_cdc psql -h localhost -p 5432 -U postgres -c "CREATE EXTENSION hstore;" + docker exec pg_cdc psql -h localhost -p 5432 -U postgres -c "ALTER SYSTEM SET wal_level=logical;" + docker exec pg_cdc psql -h localhost -p 5432 -U postgres -c "ALTER SYSTEM SET max_replication_slots=100;" + docker exec pg_cdc psql -h localhost -p 5432 -U postgres -c "ALTER SYSTEM SET max_wal_senders=100;" + docker restart pg_cdc working-directory: ./flow env: + PG_CDC: PGPASSWORD: postgres - name: run tests run: | - gotestsum --format testname -- -p 1 ./... -timeout 1200s + gotestsum --format testname -- -p 4 ./... -timeout 1200s working-directory: ./flow env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} diff --git a/flow/e2e/bigquery_helper.go b/flow/e2e/bigquery/bigquery_helper.go similarity index 99% rename from flow/e2e/bigquery_helper.go rename to flow/e2e/bigquery/bigquery_helper.go index 5e9083ec9..72e8b3ede 100644 --- a/flow/e2e/bigquery_helper.go +++ b/flow/e2e/bigquery/bigquery_helper.go @@ -1,4 +1,4 @@ -package e2e +package e2e_bigquery import ( "context" @@ -12,6 +12,7 @@ import ( "cloud.google.com/go/bigquery" "cloud.google.com/go/civil" peer_bq "github.com/PeerDB-io/peer-flow/connectors/bigquery" + "github.com/PeerDB-io/peer-flow/e2e" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model" "github.com/PeerDB-io/peer-flow/model/qvalue" @@ -45,7 +46,7 @@ func NewBigQueryTestHelper() (*BigQueryTestHelper, error) { return nil, fmt.Errorf("TEST_BQ_CREDS env var not set") } - content, err := readFileToBytes(jsonPath) + content, err := e2e.ReadFileToBytes(jsonPath) if err != nil { return nil, fmt.Errorf("failed to read file: %w", err) } diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go new file mode 100644 index 000000000..655a31785 --- /dev/null +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -0,0 +1,925 @@ +package e2e_bigquery + +import ( + "context" + "fmt" + "testing" + + "github.com/PeerDB-io/peer-flow/e2e" + "github.com/PeerDB-io/peer-flow/generated/protos" + peerflow "github.com/PeerDB-io/peer-flow/workflows" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/joho/godotenv" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "go.temporal.io/sdk/testsuite" +) + +const bigquerySuffix = "bigquery" + +type PeerFlowE2ETestSuiteBQ struct { + suite.Suite + testsuite.WorkflowTestSuite + + pool *pgxpool.Pool + bqHelper *BigQueryTestHelper +} + +func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { + suite.Run(t, new(PeerFlowE2ETestSuiteBQ)) +} + +func (s *PeerFlowE2ETestSuiteBQ) attachSchemaSuffix(tableName string) string { + return fmt.Sprintf("e2e_test_%s.%s", bigquerySuffix, tableName) +} + +func (s *PeerFlowE2ETestSuiteBQ) attachSuffix(input string) string { + return fmt.Sprintf("%s_%s", input, bigquerySuffix) +} + +// setupBigQuery sets up the bigquery connection. +func (s *PeerFlowE2ETestSuiteBQ) setupBigQuery() error { + bqHelper, err := NewBigQueryTestHelper() + if err != nil { + return fmt.Errorf("failed to create bigquery helper: %w", err) + } + + err = bqHelper.RecreateDataset() + if err != nil { + return fmt.Errorf("failed to recreate bigquery dataset: %w", err) + } + + s.bqHelper = bqHelper + return nil +} + +// Implement SetupAllSuite interface to setup the test suite +func (s *PeerFlowE2ETestSuiteBQ) SetupSuite() { + err := godotenv.Load() + if err != nil { + // it's okay if the .env file is not present + // we will use the default values + log.Infof("Unable to load .env file, using default values from env") + } + + log.SetReportCaller(true) + + pool, err := e2e.SetupPostgres(bigquerySuffix) + if err != nil { + s.Fail("failed to setup postgres", err) + } + s.pool = pool + + err = s.setupBigQuery() + if err != nil { + s.Fail("failed to setup bigquery", err) + } +} + +// Implement TearDownAllSuite interface to tear down the test suite +func (s *PeerFlowE2ETestSuiteBQ) TearDownSuite() { + err := e2e.TearDownPostgres(s.pool, bigquerySuffix) + if err != nil { + s.Fail("failed to drop Postgres schema", err) + } + + err = s.bqHelper.DropDataset() + if err != nil { + s.Fail("failed to drop bigquery dataset", err) + } +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Invalid_Connection_Config() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + // TODO (kaushikiska): ensure flow name can only be alpha numeric and underscores. + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 1, + } + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, nil, &limits, nil) + + // Verify workflow completes + s.True(env.IsWorkflowCompleted()) + err := env.GetWorkflowError() + + // assert that error contains "invalid connection configs" + s.Error(err) + s.Contains(err.Error(), "invalid connection configs") + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_no_data") + dstTableName := "test_no_data" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + key TEXT NOT NULL, + value VARCHAR(255) NOT NULL + ); + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_complete_flow_no_data"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 1, + } + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_char_coltype") + dstTableName := "test_char_coltype" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + key TEXT NOT NULL, + value CHAR(255) NOT NULL + ); + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_char_table"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 1, + } + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + env.AssertExpectations(s.T()) +} + +// Test_Complete_Simple_Flow_BQ tests a complete flow with data in the source table. +// The test inserts 10 rows into the source table and verifies that the data is +// correctly synced to the destination table after sync flow completes. +func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_simple_flow_bq") + dstTableName := "test_simple_flow_bq" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + key TEXT NOT NULL, + value TEXT NOT NULL + ); + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_complete_simple_flow"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 2, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and then insert 10 rows into the source table + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + // insert 10 rows into the source table + for i := 0; i < 10; i++ { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s(key, value) VALUES ($1, $2) + `, srcTableName), testKey, testValue) + s.NoError(err) + } + fmt.Println("Inserted 10 rows into the source table") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + count, err := s.bqHelper.CountRows(dstTableName) + s.NoError(err) + s.Equal(10, count) + + // TODO: verify that the data is correctly synced to the destination table + // on the bigquery side + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_bq_1") + dstTableName := "test_toast_bq_1" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_bq_1"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* + Executing a transaction which + 1. changes both toast column + 2. changes no toast column + 2. changes 1 toast column + */ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + END; + `, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsBQ(dstTableName, "id,t1,t2,k") + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_bq_2") + dstTableName := "test_toast_bq_2" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_bq_2"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* transaction updating no rows */ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + END; + `, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsBQ(dstTableName, "id,t1,t2,k") + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_bq_3") + dstTableName := "test_toast_bq_3" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_bq_3"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + //complex transaction with random DMLs on a table with toast columns + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + UPDATE %s SET t2='dummy' WHERE id=2; + DELETE FROM %s WHERE id=1; + INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=1 WHERE id=1; + UPDATE %s SET t1='dummy1',t2='dummy2' WHERE id=1; + UPDATE %s SET t1='dummy3' WHERE id=3; + DELETE FROM %s WHERE id=2; + DELETE FROM %s WHERE id=3; + DELETE FROM %s WHERE id=2; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, + srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsBQ(dstTableName, "id,t1,t2,k") + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_bq_4") + dstTableName := "test_toast_bq_4" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + t1 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_bq_4"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + //complex transaction with random DMLs on a table with toast columns + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s(t1,k) SELECT random_string(9000), + 1 FROM generate_series(1,1); + UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 + FROM generate_series(1,1) ) sub WHERE id=1; + UPDATE %s SET k=2 WHERE id=1; + UPDATE %s SET k=3 WHERE id=1; + UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 + FROM generate_series(1,1)) sub WHERE id=1; + UPDATE %s SET k=4 WHERE id=1; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsBQ(dstTableName, "id,t1,k") + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_bq_5") + dstTableName := "test_toast_bq_5" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_bq_5"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* + transaction updating a single row + multiple times with changed/unchanged toast columns + */ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,1); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=1; + UPDATE %s SET t2='dummy' WHERE id=1; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsBQ(dstTableName, "id,t1,t2,k") + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_types_bq") + dstTableName := "test_types_bq" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s (id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, + c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, + c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, + c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, + c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, + c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 INT[], c43 FLOAT[], c44 TEXT[]); + CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) + RETURNS bytea AS $body$ + SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') + FROM generate_series(1, $1); + $body$ + LANGUAGE 'sql' + VOLATILE + SET search_path = 'pg_catalog'; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_types_bq"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* test inserting various types*/ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s SELECT 2,2,b'1',b'101', + true,random_bytea(32),'s','test','1.1.10.2'::cidr, + CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, + '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, + '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, + 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, + 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, + txid_current_snapshot(), + '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), + ARRAY[10299301,2579827], + ARRAY[0.0003, 8902.0092], + ARRAY['hello','bye']; + `, srcTableName)) + s.NoError(err) + fmt.Println("Executed an insert with all types") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + noNulls, err := s.bqHelper.CheckNull(dstTableName, []string{"c41", "c1", "c2", "c3", "c4", + "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", + "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", + "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44"}) + if err != nil { + fmt.Println("error %w", err) + } + // Make sure that there are no nulls + s.True(noNulls) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Types_Avro_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_types_avro_bq") + dstTableName := "test_types_avro_bq" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s (id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, + c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, + c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, + c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, + c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, + c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 INT[], c43 FLOAT[], c44 TEXT[]); + CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) + RETURNS bytea AS $body$ + SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') + FROM generate_series(1, $1); + $body$ + LANGUAGE 'sql' + VOLATILE + SET search_path = 'pg_catalog'; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_types_avro_bq"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* test inserting various types*/ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s SELECT 2,2,b'1',b'101', + true,random_bytea(32),'s','test','1.1.10.2'::cidr, + CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, + '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, + '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, + 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, + 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, + txid_current_snapshot(), + '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), + ARRAY[9301,239827], + ARRAY[0.0003, 1039.0034], + ARRAY['hello','bye']; + `, srcTableName)) + s.NoError(err) + fmt.Println("Executed an insert with all types") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + noNulls, err := s.bqHelper.CheckNull(dstTableName, []string{"c41", "c1", "c2", "c3", "c4", + "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", + "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", + "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44"}) + if err != nil { + fmt.Println("error %w", err) + } + // Make sure that there are no nulls + s.True(noNulls) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Flow_BQ_Avro_CDC() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_simple_flow_bq_avro_cdc") + dstTableName := "test_simple_flow_bq_avro_cdc" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id SERIAL PRIMARY KEY, + key TEXT NOT NULL, + value TEXT NOT NULL + ); + `, srcTableName)) + s.NoError(err) + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_simple_flow_bq_avro_cdc"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 2, + MaxBatchSize: 100, + } + + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + for i := 0; i < 10; i++ { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (key, value) VALUES ($1, $2) + `, srcTableName), testKey, testValue) + s.NoError(err) + } + fmt.Println("Inserted 10 rows into the source table") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + count, err := s.bqHelper.CountRows(dstTableName) + s.NoError(err) + s.Equal(10, count) + + // TODO: verify that the data is correctly synced to the destination table + // on the bigquery side + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTable1Name := s.attachSchemaSuffix("test1_bq") + dstTable1Name := "test1_bq" + srcTable2Name := s.attachSchemaSuffix("test2_bq") + dstTable2Name := "test2_bq" + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s (id serial primary key, c1 int, c2 text); + CREATE TABLE %s(id serial primary key, c1 int, c2 text); + `, srcTable1Name, srcTable2Name)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_multi_table_bq"), + TableNameMapping: map[string]string{srcTable1Name: dstTable1Name, srcTable2Name: dstTable2Name}, + PostgresPort: e2e.PostgresPort, + Destination: s.bqHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* inserting across multiple tables*/ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); + INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); + `, srcTable1Name, srcTable2Name)) + s.NoError(err) + fmt.Println("Executed an insert on two tables") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + require.True(s.T(), env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + count1, err := s.bqHelper.CountRows(dstTable1Name) + s.NoError(err) + count2, err := s.bqHelper.CountRows(dstTable2Name) + s.NoError(err) + + s.Equal(1, count1) + s.Equal(1, count2) + + env.AssertExpectations(s.T()) +} diff --git a/flow/e2e/bigquery/qrep_flow_bq_test.go b/flow/e2e/bigquery/qrep_flow_bq_test.go new file mode 100644 index 000000000..5e6374cc1 --- /dev/null +++ b/flow/e2e/bigquery/qrep_flow_bq_test.go @@ -0,0 +1,123 @@ +package e2e_bigquery + +import ( + "context" + "fmt" + + connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" + "github.com/PeerDB-io/peer-flow/e2e" + "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/stretchr/testify/require" +) + +func (s *PeerFlowE2ETestSuiteBQ) setupSourceTable(tableName string, rowCount int) { + err := e2e.CreateSourceTableQRep(s.pool, bigquerySuffix, tableName) + s.NoError(err) + err = e2e.PopulateSourceTable(s.pool, bigquerySuffix, tableName, rowCount) + s.NoError(err) +} + +func (s *PeerFlowE2ETestSuiteBQ) setupBQDestinationTable(dstTable string) { + schema := e2e.GetOwnersSchema() + err := s.bqHelper.CreateTable(dstTable, schema) + + // fail if table creation fails + require.NoError(s.T(), err) + + fmt.Printf("created table on bigquery: %s.%s. %v\n", s.bqHelper.Config.DatasetId, dstTable, err) +} + +func (s *PeerFlowE2ETestSuiteBQ) compareTableContentsBQ(tableName string, colsString string) { + // read rows from source table + pgQueryExecutor := connpostgres.NewQRepQueryExecutor(s.pool, context.Background(), "testflow", "testpart") + pgQueryExecutor.SetTestEnv(true) + + pgRows, err := pgQueryExecutor.ExecuteAndProcessQuery( + fmt.Sprintf("SELECT %s FROM e2e_test_%s.%s ORDER BY id", colsString, bigquerySuffix, tableName), + ) + s.NoError(err) + + // read rows from destination table + qualifiedTableName := fmt.Sprintf("`%s.%s`", s.bqHelper.Config.DatasetId, tableName) + bqRows, err := s.bqHelper.ExecuteAndProcessQuery( + fmt.Sprintf("SELECT %s FROM %s ORDER BY id", colsString, qualifiedTableName), + ) + s.NoError(err) + + s.True(pgRows.Equals(bqRows), "rows from source and destination tables are not equal") +} + +func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + numRows := 10 + + tblName := "test_qrep_flow_avro" + s.setupSourceTable(tblName, numRows) + s.setupBQDestinationTable(tblName) + + query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", + bigquerySuffix, tblName) + + qrepConfig, err := e2e.CreateQRepWorkflowConfig("test_qrep_flow_avro", + fmt.Sprintf("e2e_test_%s.%s", bigquerySuffix, tblName), + tblName, + query, + protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + s.bqHelper.Peer, + "peerdb_staging") + s.NoError(err) + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + + // assert that error contains "invalid connection configs" + err = env.GetWorkflowError() + s.NoError(err) + + s.compareTableContentsBQ(tblName, "*") + + env.AssertExpectations(s.T()) +} + +// NOTE: Disabled due to large JSON tests being added: https://github.com/PeerDB-io/peerdb/issues/309 + +// Test_Complete_QRep_Flow tests a complete flow with data in the source table. +// The test inserts 10 rows into the source table and verifies that the data is +// // correctly synced to the destination table this runs a QRep Flow. +// func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Multi_Insert() { +// env := s.NewTestWorkflowEnvironment() +// registerWorkflowsAndActivities(env) + +// numRows := 10 + +// tblName := "test_qrep_flow_multi_insert" +// s.setupSourceTable(tblName, numRows) +// s.setupBQDestinationTable(tblName) + +// query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", tblName) + +// qrepConfig := s.createQRepWorkflowConfig("test_qrep_flow_mi", +// "e2e_test."+tblName, +// tblName, +// query, +// protos.QRepSyncMode_QREP_SYNC_MODE_MULTI_INSERT, +// s.bqHelper.Peer) +// runQrepFlowWorkflow(env, qrepConfig) + +// // Verify workflow completes without error +// s.True(env.IsWorkflowCompleted()) + +// // assert that error contains "invalid connection configs" +// err := env.GetWorkflowError() +// s.NoError(err) + +// count, err := s.bqHelper.CountRows(tblName) +// s.NoError(err) + +// s.Equal(numRows, count) + +// env.AssertExpectations(s.T()) +// } diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index f82912947..14725d789 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -1,9 +1,108 @@ package e2e import ( + "context" + "fmt" + + "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/jackc/pgx/v5/pgxpool" +) + +const ( + postgresHost = "localhost" + postgresUser = "postgres" + postgresPassword = "postgres" + postgresDatabase = "postgres" + PostgresPort = 7132 ) +func GetTestPostgresConf() *protos.PostgresConfig { + return &protos.PostgresConfig{ + Host: postgresHost, + Port: uint32(PostgresPort), + User: postgresUser, + Password: postgresPassword, + Database: postgresDatabase, + } +} + +func cleanPostgres(pool *pgxpool.Pool, suffix string) error { + // drop the e2e_test schema with the given suffix if it exists + _, err := pool.Exec(context.Background(), fmt.Sprintf("DROP SCHEMA IF EXISTS e2e_test_%s CASCADE", suffix)) + if err != nil { + return fmt.Errorf("failed to drop e2e_test schema: %w", err) + } + + // drop all open slots with the given suffix + _, err = pool.Exec( + context.Background(), + "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name LIKE $1", + fmt.Sprintf("%%_%s", suffix), + ) + if err != nil { + return fmt.Errorf("failed to drop replication slots: %w", err) + } + + // list all publications from pg_publication table + rows, err := pool.Query(context.Background(), + "SELECT pubname FROM pg_publication WHERE pubname LIKE $1", + fmt.Sprintf("%%_%s", suffix), + ) + if err != nil { + return fmt.Errorf("failed to list publications: %w", err) + } + + // drop all publications with the given suffix + for rows.Next() { + var pubName string + err = rows.Scan(&pubName) + if err != nil { + return fmt.Errorf("failed to scan publication name: %w", err) + } + + _, err = pool.Exec(context.Background(), fmt.Sprintf("DROP PUBLICATION %s", pubName)) + if err != nil { + return fmt.Errorf("failed to drop publication %s: %w", pubName, err) + } + } + + return nil +} + +// setupPostgres sets up the postgres connection pool. +func SetupPostgres(suffix string) (*pgxpool.Pool, error) { + pool, err := pgxpool.New(context.Background(), utils.GetPGConnectionString(GetTestPostgresConf())) + if err != nil { + return nil, fmt.Errorf("failed to create postgres connection pool: %w", err) + } + + err = cleanPostgres(pool, suffix) + if err != nil { + return nil, err + } + + // create an e2e_test schema + _, err = pool.Exec(context.Background(), fmt.Sprintf("CREATE SCHEMA e2e_test_%s", suffix)) + if err != nil { + return nil, fmt.Errorf("failed to create e2e_test schema: %w", err) + } + + return pool, nil +} + +func TearDownPostgres(pool *pgxpool.Pool, suffix string) error { + // drop the e2e_test schema + if pool != nil { + err := cleanPostgres(pool, suffix) + if err != nil { + return err + } + pool.Close() + } + return nil +} + // GeneratePostgresPeer generates a postgres peer config for testing. func GeneratePostgresPeer(postgresPort int) *protos.Peer { ret := &protos.Peer{} diff --git a/flow/e2e/eventhub_helper.go b/flow/e2e/eventhub/eventhub_helper.go similarity index 99% rename from flow/e2e/eventhub_helper.go rename to flow/e2e/eventhub/eventhub_helper.go index 2457361cb..e488da428 100644 --- a/flow/e2e/eventhub_helper.go +++ b/flow/e2e/eventhub/eventhub_helper.go @@ -1,4 +1,4 @@ -package e2e +package e2e_eventhub import ( "context" diff --git a/flow/e2e/peer_flow_eh_test.go b/flow/e2e/eventhub/peer_flow_eh_test.go similarity index 57% rename from flow/e2e/peer_flow_eh_test.go rename to flow/e2e/eventhub/peer_flow_eh_test.go index 23a85abde..687f17fac 100644 --- a/flow/e2e/peer_flow_eh_test.go +++ b/flow/e2e/eventhub/peer_flow_eh_test.go @@ -1,23 +1,44 @@ -package e2e +package e2e_eventhub import ( "context" "fmt" "os" + "testing" "time" + "github.com/PeerDB-io/peer-flow/e2e" util "github.com/PeerDB-io/peer-flow/utils" peerflow "github.com/PeerDB-io/peer-flow/workflows" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/joho/godotenv" + log "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "go.temporal.io/sdk/testsuite" ) -func (s *E2EPeerFlowTestSuite) setupEventHub() error { +const eventhubSuffix = "eventhub" + +type PeerFlowE2ETestSuiteEH struct { + suite.Suite + testsuite.WorkflowTestSuite + + pool *pgxpool.Pool + ehHelper *EventHubTestHelper +} + +func TestPeerFlowE2ETestSuiteEH(t *testing.T) { + suite.Run(t, new(PeerFlowE2ETestSuiteEH)) +} + +func (s *PeerFlowE2ETestSuiteEH) setupEventHub() error { enableEHT := os.Getenv("ENABLE_EVENT_HUB_TESTS") if enableEHT == "" { return nil } - pgConf := GetTestPostgresConf() + pgConf := e2e.GetTestPostgresConf() helper, err := NewEventHubTestHelper(pgConf) if err != nil { return err @@ -27,13 +48,50 @@ func (s *E2EPeerFlowTestSuite) setupEventHub() error { return nil } -func (s *E2EPeerFlowTestSuite) Test_Complete_Simple_Flow_EH() { +func (s *PeerFlowE2ETestSuiteEH) SetupSuite() { + err := godotenv.Load() + if err != nil { + // it's okay if the .env file is not present + // we will use the default values + log.Infof("Unable to load .env file, using default values from env") + } + + log.SetReportCaller(true) + + pool, err := e2e.SetupPostgres(eventhubSuffix) + if err != nil { + s.Fail("failed to setup postgres", err) + } + s.pool = pool + + err = s.setupEventHub() + if err != nil { + s.Fail("failed to setup eventhub", err) + } +} + +// Implement TearDownAllSuite interface to tear down the test suite +func (s *PeerFlowE2ETestSuiteEH) TearDownSuite() { + err := e2e.TearDownPostgres(s.pool, eventhubSuffix) + if err != nil { + s.Fail("failed to drop Postgres schema", err) + } + + if s.ehHelper != nil { + err = s.ehHelper.CleanUp() + if err != nil { + s.Fail("failed to clean up eventhub", err) + } + } +} + +func (s *PeerFlowE2ETestSuiteEH) Test_Complete_Simple_Flow_EH() { if s.ehHelper == nil { s.T().Skip("Skipping EventHub test") } env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) + e2e.RegisterWorkflowsAndActivities(env) ru, err := util.RandomUInt64() s.NoError(err) @@ -49,10 +107,10 @@ func (s *E2EPeerFlowTestSuite) Test_Complete_Simple_Flow_EH() { `) s.NoError(err) - connectionGen := FlowConnectionGenerationConfig{ + connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: jobName, TableNameMapping: map[string]string{schemaQualifiedName: jobName}, - PostgresPort: postgresPort, + PostgresPort: e2e.PostgresPort, Destination: s.ehHelper.GetPeer(), } @@ -67,7 +125,7 @@ func (s *E2EPeerFlowTestSuite) Test_Complete_Simple_Flow_EH() { // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) + e2e.SetupPeerFlowStatusQuery(env, connectionGen) // insert 10 rows into the source table for i := 0; i < 10; i++ { testKey := fmt.Sprintf("test_key_%d", i) diff --git a/flow/e2e/peer_flow_s3_test.go b/flow/e2e/peer_flow_s3_test.go deleted file mode 100644 index 7cc6e7e38..000000000 --- a/flow/e2e/peer_flow_s3_test.go +++ /dev/null @@ -1,153 +0,0 @@ -package e2e - -import ( - "context" - "fmt" - "time" - - "github.com/PeerDB-io/peer-flow/generated/protos" - util "github.com/PeerDB-io/peer-flow/utils" - "github.com/stretchr/testify/require" -) - -func (s *E2EPeerFlowTestSuite) setupS3() error { - helper, err := NewS3TestHelper() - if err != nil { - return err - } - - s.s3Helper = helper - return nil -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_S3() { - if s.s3Helper == nil { - s.T().Skip("Skipping S3 test") - } - - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - ru, err := util.RandomUInt64() - s.NoError(err) - - jobName := fmt.Sprintf("test_complete_flow_s3_%d", ru) - schemaQualifiedName := fmt.Sprintf("e2e_test.%s", jobName) - _, err = s.pool.Exec(context.Background(), ` - CREATE TABLE `+schemaQualifiedName+` ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `) - s.NoError(err) - - tblName := "test_qrep_flow_s3_1" - s.setupSourceTable(tblName, 10) - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", tblName) - qrepConfig := s.createQRepWorkflowConfig( - jobName, - "e2e_test."+tblName, - "e2e_dest_1", - query, - protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - s.s3Helper.GetPeer(), - "stage", - ) - qrepConfig.StagingPath = s.s3Helper.s3Config.Url - - runQrepFlowWorkflow(env, qrepConfig) - - go func() { - // insert 10 rows into the source table - for i := 0; i < 10; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO `+schemaQualifiedName+` (key, value) VALUES ($1, $2) - `, testKey, testValue) - s.NoError(err) - } - fmt.Println("Inserted 10 rows into the source table") - }() - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - s.NoError(err) - - // Verify destination has 1 file - // make context with timeout - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - files, err := s.s3Helper.ListAllFiles(ctx, jobName) - - require.NoError(s.T(), err) - - require.Equal(s.T(), 1, len(files)) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_S3_CTID() { - if s.s3Helper == nil { - s.T().Skip("Skipping S3 test") - } - - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - ru, err := util.RandomUInt64() - s.NoError(err) - - jobName := fmt.Sprintf("test_complete_flow_s3_ctid_%d", ru) - schemaQualifiedName := fmt.Sprintf("e2e_test.%s", jobName) - _, err = s.pool.Exec(context.Background(), ` - CREATE TABLE `+schemaQualifiedName+` ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `) - s.NoError(err) - - tblName := "test_qrep_flow_s3_ctid" - s.setupSourceTable(tblName, 20000) - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE ctid BETWEEN {{.start}} AND {{.end}}", tblName) - qrepConfig := s.createQRepWorkflowConfig( - jobName, - "e2e_test."+tblName, - "e2e_dest_ctid", - query, - protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - s.s3Helper.GetPeer(), - "stage", - ) - qrepConfig.StagingPath = s.s3Helper.s3Config.Url - qrepConfig.NumRowsPerPartition = 2000 - qrepConfig.InitialCopyOnly = true - qrepConfig.WatermarkColumn = "ctid" - - runQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - s.NoError(err) - - // Verify destination has 1 file - // make context with timeout - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - files, err := s.s3Helper.ListAllFiles(ctx, jobName) - - require.NoError(s.T(), err) - - require.Equal(s.T(), 10, len(files)) - - env.AssertExpectations(s.T()) -} diff --git a/flow/e2e/peer_flow_test.go b/flow/e2e/peer_flow_test.go deleted file mode 100644 index 4df215cd3..000000000 --- a/flow/e2e/peer_flow_test.go +++ /dev/null @@ -1,1809 +0,0 @@ -package e2e - -import ( - "context" - "fmt" - "math/rand" - "os" - "testing" - "time" - - "github.com/PeerDB-io/peer-flow/activities" - "github.com/PeerDB-io/peer-flow/connectors/utils" - "github.com/PeerDB-io/peer-flow/generated/protos" - util "github.com/PeerDB-io/peer-flow/utils" - peerflow "github.com/PeerDB-io/peer-flow/workflows" - "github.com/jackc/pgx/v5/pgxpool" - "github.com/joho/godotenv" - log "github.com/sirupsen/logrus" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" - "go.temporal.io/sdk/testsuite" -) - -type E2EPeerFlowTestSuite struct { - suite.Suite - testsuite.WorkflowTestSuite - - pgConnStr string - pool *pgxpool.Pool - - bqHelper *BigQueryTestHelper - sfHelper *SnowflakeTestHelper - ehHelper *EventHubTestHelper - s3Helper *S3TestHelper - sqlsHelper *SQLServerHelper -} - -func TestE2EPeerFlowTestSuite(t *testing.T) { - suite.Run(t, new(E2EPeerFlowTestSuite)) -} - -const ( - postgresHost = "localhost" - postgresUser = "postgres" - postgresPassword = "postgres" - postgresDatabase = "postgres" - postgresPort = 7132 -) - -func GetTestPostgresConf() *protos.PostgresConfig { - return &protos.PostgresConfig{ - Host: postgresHost, - Port: uint32(postgresPort), - User: postgresUser, - Password: postgresPassword, - Database: postgresDatabase, - } -} - -// setupPostgres sets up the postgres connection pool. -func (s *E2EPeerFlowTestSuite) setupPostgres() error { - s.pgConnStr = utils.GetPGConnectionString(GetTestPostgresConf()) - pool, err := pgxpool.New(context.Background(), s.pgConnStr) - if err != nil { - return fmt.Errorf("failed to create postgres connection pool: %w", err) - } - - s.pool = pool - - // drop the e2e_test schema if it exists - _, err = s.pool.Exec(context.Background(), "DROP SCHEMA IF EXISTS e2e_test CASCADE") - if err != nil { - return fmt.Errorf("failed to drop e2e_test schema: %w", err) - } - - // create an e2e_test schema - _, err = s.pool.Exec(context.Background(), "CREATE SCHEMA e2e_test") - if err != nil { - return fmt.Errorf("failed to create e2e_test schema: %w", err) - } - - // drop all open slots - _, err = s.pool.Exec( - context.Background(), - "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots", - ) - if err != nil { - return fmt.Errorf("failed to drop replication slots: %w", err) - } - - // list all publications from pg_publication table - rows, err := s.pool.Query(context.Background(), "SELECT pubname FROM pg_publication") - if err != nil { - return fmt.Errorf("failed to list publications: %w", err) - } - - // drop all publications - for rows.Next() { - var pubName string - err = rows.Scan(&pubName) - if err != nil { - return fmt.Errorf("failed to scan publication name: %w", err) - } - - _, err = s.pool.Exec(context.Background(), fmt.Sprintf("DROP PUBLICATION %s", pubName)) - if err != nil { - return fmt.Errorf("failed to drop publication %s: %w", pubName, err) - } - } - - return nil -} - -// setupBigQuery sets up the bigquery connection. -func (s *E2EPeerFlowTestSuite) setupBigQuery() error { - bqHelper, err := NewBigQueryTestHelper() - if err != nil { - return fmt.Errorf("failed to create bigquery helper: %w", err) - } - - err = bqHelper.RecreateDataset() - if err != nil { - return fmt.Errorf("failed to recreate bigquery dataset: %w", err) - } - - s.bqHelper = bqHelper - return nil -} - -// setupSnowflake sets up the snowflake connection. -func (s *E2EPeerFlowTestSuite) setupSnowflake() error { - runID, err := util.RandomUInt64() - if err != nil { - return fmt.Errorf("failed to generate random uint64: %w", err) - } - - testSchemaName := fmt.Sprintf("e2e_test_%d", runID) - - sfHelper, err := NewSnowflakeTestHelper(testSchemaName) - if err != nil { - return fmt.Errorf("failed to create snowflake helper: %w", err) - } - - err = sfHelper.RecreateSchema() - if err != nil { - return fmt.Errorf("failed to recreate snowflake schema: %w", err) - } - s.sfHelper = sfHelper - - // for every test, drop the _PEERDB_INTERNAL schema - err = s.sfHelper.client.DropSchema("_PEERDB_INTERNAL") - require.NoError(s.T(), err) - - return nil -} - -// setup sql server connection -func (s *E2EPeerFlowTestSuite) setupSQLServer() { - env := os.Getenv("ENABLE_SQLSERVER_TESTS") - if env != "true" { - s.sqlsHelper = nil - return - } - - sqlsHelper, err := NewSQLServerHelper("test_sqlserver_peer") - require.NoError(s.T(), err) - s.sqlsHelper = sqlsHelper -} - -// Implement SetupAllSuite interface to setup the test suite -func (s *E2EPeerFlowTestSuite) SetupSuite() { - err := godotenv.Load() - if err != nil { - // it's okay if the .env file is not present - // we will use the default values - log.Infof("Unable to load .env file, using default values from env") - } - - log.SetReportCaller(true) - - // seed the random number generator with current time - rand.Seed(time.Now().UnixNano()) - - err = s.setupPostgres() - if err != nil { - s.Fail("failed to setup postgres", err) - } - - err = s.setupBigQuery() - if err != nil { - s.Fail("failed to setup bigquery", err) - } - - err = s.setupSnowflake() - if err != nil { - s.Fail("failed to setup snowflake", err) - } - - err = s.setupEventHub() - if err != nil { - s.Fail("failed to setup eventhub", err) - } - - err = s.setupS3() - if err != nil { - s.Fail("failed to setup s3", err) - } - - s.setupSQLServer() -} - -// Implement TearDownAllSuite interface to tear down the test suite -func (s *E2EPeerFlowTestSuite) TearDownSuite() { - // drop the e2e_test schema - _, err := s.pool.Exec(context.Background(), "DROP SCHEMA e2e_test CASCADE") - if err != nil { - s.Fail("failed to drop e2e_test schema", err) - } - - if s.pool != nil { - s.pool.Close() - } - - err = s.bqHelper.DropDataset() - if err != nil { - s.Fail("failed to drop bigquery dataset", err) - } - - if s.sfHelper != nil { - err = s.sfHelper.DropSchema() - if err != nil { - s.Fail("failed to drop snowflake schema", err) - } - } else { - s.Fail("snowflake helper is nil, unable to drop snowflake schema") - } - - if s.ehHelper != nil { - err = s.ehHelper.CleanUp() - if err != nil { - s.Fail("failed to clean up eventhub", err) - } - } - - if s.s3Helper != nil { - err = s.s3Helper.CleanUp() - if err != nil { - s.Fail("failed to clean up s3", err) - } - } - - if s.sqlsHelper != nil { - err = s.sqlsHelper.CleanUp() - if err != nil { - s.Fail("failed to clean up sqlserver", err) - } - } -} - -func (s *E2EPeerFlowTestSuite) TearDownTest() { - // clear all replication slots - _, err := s.pool.Exec( - context.Background(), - "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots", - ) - if err != nil { - s.Fail("failed to drop replication slots", err) - } -} - -func registerWorkflowsAndActivities(env *testsuite.TestWorkflowEnvironment) { - // set a 300 second timeout for the workflow to execute a few runs. - env.SetTestTimeout(300 * time.Second) - - env.RegisterWorkflow(peerflow.PeerFlowWorkflow) - env.RegisterWorkflow(peerflow.PeerFlowWorkflowWithConfig) - env.RegisterWorkflow(peerflow.SyncFlowWorkflow) - env.RegisterWorkflow(peerflow.SetupFlowWorkflow) - env.RegisterWorkflow(peerflow.SnapshotFlowWorkflow) - env.RegisterWorkflow(peerflow.NormalizeFlowWorkflow) - env.RegisterWorkflow(peerflow.QRepFlowWorkflow) - env.RegisterWorkflow(peerflow.QRepPartitionWorkflow) - env.RegisterActivity(&activities.FetchConfigActivity{}) - env.RegisterActivity(&activities.FlowableActivity{}) - env.RegisterActivity(&activities.SnapshotActivity{}) -} - -func (s *E2EPeerFlowTestSuite) Test_Invalid_Connection_Config() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - // TODO (kaushikiska): ensure flow name can only be alpha numeric and underscores. - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 1, - } - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, nil, &limits, nil) - - // Verify workflow completes - s.True(env.IsWorkflowCompleted()) - err := env.GetWorkflowError() - - // assert that error contains "invalid connection configs" - s.Error(err) - s.Contains(err.Error(), "invalid connection configs") - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_Flow_No_Data() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value VARCHAR(255) NOT NULL - ); - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_complete_flow_no_data", - TableNameMapping: map[string]string{"e2e_test.test": "test"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 1, - } - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Char_ColType_Error() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test_char_table ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value CHAR(255) NOT NULL - ); - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_char_table", - TableNameMapping: map[string]string{"e2e_test.test_char_table": "test"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 1, - } - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - env.AssertExpectations(s.T()) -} - -// Test_Complete_Simple_Flow_BQ tests a complete flow with data in the source table. -// The test inserts 10 rows into the source table and verifies that the data is -// correctly synced to the destination table after sync flow completes. -func (s *E2EPeerFlowTestSuite) Test_Complete_Simple_Flow_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test_simple_flow_bq ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_complete_single_col_flow_bq", - TableNameMapping: map[string]string{"e2e_test.test_simple_flow_bq": "test_simple_flow_bq"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 2, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and then insert 10 rows into the source table - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - // insert 10 rows into the source table - for i := 0; i < 10; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_simple_flow_bq (key, value) VALUES ($1, $2) - `, testKey, testValue) - s.NoError(err) - } - fmt.Println("Inserted 10 rows into the source table") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - // TODO: verify that the data is correctly synced to the destination table - // on the bigquery side - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_bq_1 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_bq_1", - TableNameMapping: map[string]string{"e2e_test.test_toast_bq_1": "test_toast_bq_1"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* - Executing a transaction which - 1. changes both toast column - 2. changes no toast column - 2. changes 1 toast column - */ - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_bq_1(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE e2e_test.test_toast_bq_1 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_bq_1 SET t1='dummy' WHERE id=2; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsBQ("test_toast_bq_1", "id,t1,t2,k") - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Nochanges_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_bq_2 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_bq_2", - TableNameMapping: map[string]string{"e2e_test.test_toast_bq_2": "test_toast_bq_2"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* transaction updating no rows */ - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - UPDATE e2e_test.test_toast_bq_2 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_bq_2 SET t1='dummy' WHERE id=2; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsBQ("test_toast_bq_2", "id,t1,t2,k") - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Advance_1_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_bq_3 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_bq_3", - TableNameMapping: map[string]string{"e2e_test.test_toast_bq_3": "test_toast_bq_3"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - //complex transaction with random DMLs on a table with toast columns - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_bq_3(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE e2e_test.test_toast_bq_3 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_bq_3 SET t1='dummy' WHERE id=2; - UPDATE e2e_test.test_toast_bq_3 SET t2='dummy' WHERE id=2; - DELETE FROM e2e_test.test_toast_bq_3 WHERE id=1; - INSERT INTO e2e_test.test_toast_bq_3(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE e2e_test.test_toast_bq_3 SET k=1 WHERE id=1; - UPDATE e2e_test.test_toast_bq_3 SET t1='dummy1',t2='dummy2' WHERE id=1; - UPDATE e2e_test.test_toast_bq_3 SET t1='dummy3' WHERE id=3; - DELETE FROM e2e_test.test_toast_bq_3 WHERE id=2; - DELETE FROM e2e_test.test_toast_bq_3 WHERE id=3; - DELETE FROM e2e_test.test_toast_bq_3 WHERE id=2; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsBQ("test_toast_bq_3", "id,t1,t2,k") - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) SetupPeerFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, - connectionGen FlowConnectionGenerationConfig) { - // wait for PeerFlowStatusQuery to finish setup - // sleep for 5 second to allow the workflow to start - time.Sleep(5 * time.Second) - for { - response, err := env.QueryWorkflow( - peerflow.PeerFlowStatusQuery, - connectionGen.FlowJobName, - ) - if err == nil { - var state peerflow.PeerFlowState - err = response.Get(&state) - s.NoError(err) - - if state.SetupComplete { - fmt.Println("query indicates setup is complete") - break - } - } else { - // log the error for informational purposes - fmt.Println(err) - } - time.Sleep(1 * time.Second) - } -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Advance_2_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_bq_4 ( - id SERIAL PRIMARY KEY, - t1 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_bq_4", - TableNameMapping: map[string]string{"e2e_test.test_toast_bq_4": "test_toast_bq_4"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - //complex transaction with random DMLs on a table with toast columns - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_bq_4(t1,k) SELECT random_string(9000), - 1 FROM generate_series(1,1); - UPDATE e2e_test.test_toast_bq_4 SET t1=sub.t1 FROM (SELECT random_string(9000) t1 - FROM generate_series(1,1) ) sub WHERE id=1; - UPDATE e2e_test.test_toast_bq_4 SET k=2 WHERE id=1; - UPDATE e2e_test.test_toast_bq_4 SET k=3 WHERE id=1; - UPDATE e2e_test.test_toast_bq_4 SET t1=sub.t1 FROM (SELECT random_string(9000) t1 - FROM generate_series(1,1)) sub WHERE id=1; - UPDATE e2e_test.test_toast_bq_4 SET k=4 WHERE id=1; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsBQ("test_toast_bq_4", "id,t1,k") - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Advance_3_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_bq_5 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_bq_5", - TableNameMapping: map[string]string{"e2e_test.test_toast_bq_5": "test_toast_bq_5"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* - transaction updating a single row - multiple times with changed/unchanged toast columns - */ - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_bq_5(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,1); - UPDATE e2e_test.test_toast_bq_5 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_bq_5 SET t1='dummy' WHERE id=1; - UPDATE e2e_test.test_toast_bq_5 SET t2='dummy' WHERE id=1; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsBQ("test_toast_bq_5", "id,t1,t2,k") - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Types_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_types_bq(id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, - c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, - c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, - c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, - c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, - c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 INT[], c43 FLOAT[]); - CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) - RETURNS bytea AS $body$ - SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') - FROM generate_series(1, $1); - $body$ - LANGUAGE 'sql' - VOLATILE - SET search_path = 'pg_catalog'; - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_types_bq", - TableNameMapping: map[string]string{"e2e_test.test_types_bq": "test_types_bq"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* test inserting various types*/ - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_types_bq SELECT 2,2,b'1',b'101', - true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, - '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, - '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, - 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, - 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, - txid_current_snapshot(), - '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), - ARRAY[10299301,2579827], - ARRAY[0.0003, 8902.0092]; - `) - s.NoError(err) - fmt.Println("Executed an insert with all types") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - noNulls, err := s.bqHelper.CheckNull("test_types_bq", []string{"c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32", "c42", "c43"}) - if err != nil { - fmt.Println("error %w", err) - } - // Make sure that there are no nulls - s.Equal(noNulls, true) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Types_Avro_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_types_avro_bq(id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, - c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, - c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, - c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, - c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, - c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 INT[], c43 FLOAT[], c44 TEXT[]); - CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) - RETURNS bytea AS $body$ - SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') - FROM generate_series(1, $1); - $body$ - LANGUAGE 'sql' - VOLATILE - SET search_path = 'pg_catalog'; - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_types_avro_bq", - TableNameMapping: map[string]string{"e2e_test.test_types_avro_bq": "test_types_avro_bq"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* test inserting various types*/ - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_types_avro_bq SELECT 2,2,b'1',b'101', - true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, - '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, - '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, - 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, - 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, - txid_current_snapshot(), - '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), - ARRAY[9301,239827], - ARRAY[0.0003, 1039.0034], - ARRAY['hello','bye']; - `) - s.NoError(err) - fmt.Println("Executed an insert with all types") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - noNulls, err := s.bqHelper.CheckNull("test_types_avro_bq", []string{"c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32", "c42", "c43"}) - if err != nil { - fmt.Println("error %w", err) - } - // Make sure that there are no nulls - s.Equal(noNulls, true) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Simple_Flow_BQ_Avro_CDC() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test_simple_flow_bq_avro_cdc ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `) - s.NoError(err) - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_simple_flow_bq_avro_cdc", - TableNameMapping: map[string]string{"e2e_test.test_simple_flow_bq_avro_cdc": "test_simple_flow_bq_avro_cdc"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 2, - MaxBatchSize: 100, - } - - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - for i := 0; i < 10; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_simple_flow_bq_avro_cdc (key, value) VALUES ($1, $2) - `, testKey, testValue) - s.NoError(err) - } - fmt.Println("Inserted 10 rows into the source table") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - count, err := s.bqHelper.CountRows("test_simple_flow_bq") - s.NoError(err) - s.Equal(10, count) - - // TODO: verify that the data is correctly synced to the destination table - // on the bigquery side - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Multi_Table_BQ() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test1_bq(id serial primary key, c1 int, c2 text); - CREATE TABLE e2e_test.test2_bq(id serial primary key, c1 int, c2 text); - `) - s.NoError(err) - - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_multi_table_bq", - TableNameMapping: map[string]string{"e2e_test.test1_bq": "test1_bq", "e2e_test.test2_bq": "test2_bq"}, - PostgresPort: postgresPort, - Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* inserting across multiple tables*/ - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test1_bq(c1,c2) VALUES (1,'dummy_1'); - INSERT INTO e2e_test.test2_bq(c1,c2) VALUES (-1,'dummy_-1'); - `) - s.NoError(err) - fmt.Println("Executed an insert with all types") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(s.T(), env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - count1, err := s.bqHelper.CountRows("test1_bq") - s.NoError(err) - count2, err := s.bqHelper.CountRows("test2_bq") - s.NoError(err) - - s.Equal(1, count1) - s.Equal(1, count2) - - env.AssertExpectations(s.T()) -} - -// tests for snowflake - -func (s *E2EPeerFlowTestSuite) Test_Complete_Simple_Flow_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test_simple_flow_sf ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `) - s.NoError(err) - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_simple_flow_sf") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_complete_single_col_flow_sf", - TableNameMapping: map[string]string{"e2e_test.test_simple_flow_sf": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 2, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and then insert 10 rows into the source table - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - // insert 10 rows into the source table - for i := 0; i < 10; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_simple_flow_sf (key, value) VALUES ($1, $2) - `, testKey, testValue) - s.NoError(err) - } - fmt.Println("Inserted 10 rows into the source table") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - count, err := s.sfHelper.CountRows("test_simple_flow_sf") - s.NoError(err) - s.Equal(10, count) - - // TODO: verify that the data is correctly synced to the destination table - // on the bigquery side - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_Simple_Flow_SF_Avro_CDC() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test_simple_flow_sf_avro_cdc ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `) - s.NoError(err) - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_simple_flow_sf_avro_cdc") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_complete_single_col_flow_sf", - TableNameMapping: map[string]string{"e2e_test.test_simple_flow_sf_avro_cdc": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 2, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and then insert 10 rows into the source table - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - // insert 10 rows into the source table - for i := 0; i < 10; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_simple_flow_sf_avro_cdc (key, value) VALUES ($1, $2) - `, testKey, testValue) - s.NoError(err) - } - fmt.Println("Inserted 10 rows into the source table") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - count, err := s.sfHelper.CountRows("test_simple_flow_sf") - s.NoError(err) - s.Equal(10, count) - - // TODO: verify that the data is correctly synced to the destination table - // on the bigquery side - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_sf_1 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_1") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_sf_1", - TableNameMapping: map[string]string{"e2e_test.test_toast_sf_1": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* - Executing a transaction which - 1. changes both toast column - 2. changes no toast column - 2. changes 1 toast column - */ - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_sf_1(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE e2e_test.test_toast_sf_1 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_sf_1 SET t1='dummy' WHERE id=2; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsSF("test_toast_sf_1", `id,t1,t2,k`, false) - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Nochanges_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_sf_2 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_2") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_sf_2", - TableNameMapping: map[string]string{"e2e_test.test_toast_sf_2": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* transaction updating no rows */ - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - UPDATE e2e_test.test_toast_sf_2 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_sf_2 SET t1='dummy' WHERE id=2; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsSF("test_toast_sf_2", `id,t1,t2,k`, false) - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Advance_1_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_sf_3 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_3") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_sf_3", - TableNameMapping: map[string]string{"e2e_test.test_toast_sf_3": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 2, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - //complex transaction with random DMLs on a table with toast columns - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_sf_3(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE e2e_test.test_toast_sf_3 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_sf_3 SET t1='dummy' WHERE id=2; - UPDATE e2e_test.test_toast_sf_3 SET t2='dummy' WHERE id=2; - DELETE FROM e2e_test.test_toast_sf_3 WHERE id=1; - INSERT INTO e2e_test.test_toast_sf_3(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,2); - UPDATE e2e_test.test_toast_sf_3 SET k=1 WHERE id=1; - UPDATE e2e_test.test_toast_sf_3 SET t1='dummy1',t2='dummy2' WHERE id=1; - UPDATE e2e_test.test_toast_sf_3 SET t1='dummy3' WHERE id=3; - DELETE FROM e2e_test.test_toast_sf_3 WHERE id=2; - DELETE FROM e2e_test.test_toast_sf_3 WHERE id=3; - DELETE FROM e2e_test.test_toast_sf_3 WHERE id=2; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsSF("test_toast_sf_3", `id,t1,t2,k`, false) - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Advance_2_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_sf_4 ( - id SERIAL PRIMARY KEY, - t1 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_4") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_sf_4", - TableNameMapping: map[string]string{"e2e_test.test_toast_sf_4": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - //complex transaction with random DMLs on a table with toast columns - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_sf_4(t1,k) SELECT random_string(9000), - 1 FROM generate_series(1,1); - UPDATE e2e_test.test_toast_sf_4 SET t1=sub.t1 FROM (SELECT random_string(9000) t1 - FROM generate_series(1,1) ) sub WHERE id=1; - UPDATE e2e_test.test_toast_sf_4 SET k=2 WHERE id=1; - UPDATE e2e_test.test_toast_sf_4 SET k=3 WHERE id=1; - UPDATE e2e_test.test_toast_sf_4 SET t1=sub.t1 FROM (SELECT random_string(9000) t1 - FROM generate_series(1,1)) sub WHERE id=1; - UPDATE e2e_test.test_toast_sf_4 SET k=4 WHERE id=1; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsSF("test_toast_sf_4", `id,t1,k`, false) - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Toast_Advance_3_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_toast_sf_5 ( - id SERIAL PRIMARY KEY, - t1 text, - t2 text, - k int - );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - `) - s.NoError(err) - - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_5") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_toast_sf_5", - TableNameMapping: map[string]string{"e2e_test.test_toast_sf_5": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* - transaction updating a single row - multiple times with changed/unchanged toast columns - */ - _, err = s.pool.Exec(context.Background(), ` - BEGIN; - INSERT INTO e2e_test.test_toast_sf_5(t1,t2,k) SELECT random_string(9000),random_string(9000), - 1 FROM generate_series(1,1); - UPDATE e2e_test.test_toast_sf_5 SET k=102 WHERE id=1; - UPDATE e2e_test.test_toast_sf_5 SET t1='dummy' WHERE id=1; - UPDATE e2e_test.test_toast_sf_5 SET t2='dummy' WHERE id=1; - END; - `) - s.NoError(err) - fmt.Println("Executed a transaction touching toast columns") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - s.compareTableContentsSF("test_toast_sf_5", `id,t1,t2,k`, false) - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Types_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_types_sf(id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, - c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, - c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, - c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, - c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, - c39 TXID_SNAPSHOT,c40 UUID,c41 XML); - CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) - RETURNS bytea AS $body$ - SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') - FROM generate_series(1, $1); - $body$ - LANGUAGE 'sql' - VOLATILE - SET search_path = 'pg_catalog'; - `) - s.NoError(err) - - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_types_sf") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_types_sf", - TableNameMapping: map[string]string{"e2e_test.test_types_sf": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* test inserting various types*/ - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_types_sf SELECT 2,2,b'1',b'101', - true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, - '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, - '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, - 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, - 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, - txid_current_snapshot(), - '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'); - `) - s.NoError(err) - fmt.Println("Executed an insert with all types") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - noNulls, err := s.sfHelper.CheckNull("test_types_sf", []string{"c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32"}) - if err != nil { - fmt.Println("error %w", err) - } - // Make sure that there are no nulls - s.Equal(noNulls, true) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Types_SF_Avro_CDC() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - - CREATE TABLE e2e_test.test_types_sf_avro_cdc(id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, - c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, - c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, - c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, - c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, - c39 TXID_SNAPSHOT,c40 UUID,c41 XML); - CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) - RETURNS bytea AS $body$ - SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') - FROM generate_series(1, $1); - $body$ - LANGUAGE 'sql' - VOLATILE - SET search_path = 'pg_catalog'; - `) - s.NoError(err) - - tableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_types_sf_avro_cdc") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_types_sf", - TableNameMapping: map[string]string{"e2e_test.test_types_sf_avro_cdc": tableName}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* test inserting various types*/ - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test_types_sf_avro_cdc SELECT 2,2,b'1',b'101', - true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, - '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, - '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, - 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, - 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, - txid_current_snapshot(), - '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'); - `) - s.NoError(err) - fmt.Println("Executed an insert with all types") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - noNulls, err := s.sfHelper.CheckNull("test_types_sf_avro_cdc", []string{"c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32"}) - if err != nil { - fmt.Println("error %w", err) - } - // Make sure that there are no nulls - s.Equal(noNulls, true) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Multi_Table_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - _, err := s.pool.Exec(context.Background(), ` - CREATE TABLE e2e_test.test1_sf(id serial primary key, c1 int, c2 text); - CREATE TABLE e2e_test.test2_sf(id serial primary key, c1 int, c2 text); - `) - s.NoError(err) - - table1 := fmt.Sprintf(s.sfHelper.testSchemaName + ".test1_sf") - table2 := fmt.Sprintf(s.sfHelper.testSchemaName + ".test2_sf") - connectionGen := FlowConnectionGenerationConfig{ - FlowJobName: "test_multi_table_sf", - TableNameMapping: map[string]string{"e2e_test.test1_sf": table1, "e2e_test.test2_sf": table2}, - PostgresPort: postgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - s.NoError(err) - - limits := peerflow.PeerFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - s.SetupPeerFlowStatusQuery(env, connectionGen) - /* inserting across multiple tables*/ - _, err = s.pool.Exec(context.Background(), ` - INSERT INTO e2e_test.test1_sf(c1,c2) VALUES (1,'dummy_1'); - INSERT INTO e2e_test.test2_sf(c1,c2) VALUES (-1,'dummy_-1'); - `) - s.NoError(err) - fmt.Println("Executed an insert with all types") - }() - - env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - count1, err := s.sfHelper.CountRows("test1_sf") - s.NoError(err) - count2, err := s.sfHelper.CountRows("test2_sf") - s.NoError(err) - - s.Equal(1, count1) - s.Equal(1, count2) - - env.AssertExpectations(s.T()) -} diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go new file mode 100644 index 000000000..241659c22 --- /dev/null +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -0,0 +1,147 @@ +package e2e_postgres + +import ( + "context" + "fmt" + "testing" + + "github.com/PeerDB-io/peer-flow/e2e" + "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/joho/godotenv" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/suite" + "go.temporal.io/sdk/testsuite" +) + +const postgresSuffix = "postgres" + +type PeerFlowE2ETestSuitePG struct { + suite.Suite + testsuite.WorkflowTestSuite + + pool *pgxpool.Pool +} + +func TestPeerFlowE2ETestSuitePG(t *testing.T) { + suite.Run(t, new(PeerFlowE2ETestSuitePG)) +} + +// Implement SetupAllSuite interface to setup the test suite +func (s *PeerFlowE2ETestSuitePG) SetupSuite() { + err := godotenv.Load() + if err != nil { + // it's okay if the .env file is not present + // we will use the default values + log.Infof("Unable to load .env file, using default values from env") + } + + log.SetReportCaller(true) + + pool, err := e2e.SetupPostgres(postgresSuffix) + if err != nil { + s.Fail("failed to setup postgres", err) + } + s.pool = pool +} + +// Implement TearDownAllSuite interface to tear down the test suite +func (s *PeerFlowE2ETestSuitePG) TearDownSuite() { + err := e2e.TearDownPostgres(s.pool, postgresSuffix) + if err != nil { + s.Fail("failed to drop Postgres schema", err) + } +} + +func (s *PeerFlowE2ETestSuitePG) setupSourceTable(tableName string, rowCount int) { + err := e2e.CreateSourceTableQRep(s.pool, postgresSuffix, tableName) + s.NoError(err) + err = e2e.PopulateSourceTable(s.pool, postgresSuffix, tableName, rowCount) + s.NoError(err) +} + +func (s *PeerFlowE2ETestSuitePG) comparePGTables(srcSchemaQualified, dstSchemaQualified string) error { + // Execute the two EXCEPT queries + err := s.compareQuery(srcSchemaQualified, dstSchemaQualified) + if err != nil { + return err + } + + err = s.compareQuery(dstSchemaQualified, srcSchemaQualified) + if err != nil { + return err + } + + // If no error is returned, then the contents of the two tables are the same + return nil +} + +func (s *PeerFlowE2ETestSuitePG) compareQuery(schema1, schema2 string) error { + query := fmt.Sprintf("SELECT * FROM %s EXCEPT SELECT * FROM %s", schema1, schema2) + rows, _ := s.pool.Query(context.Background(), query) + + defer rows.Close() + for rows.Next() { + values, err := rows.Values() + if err != nil { + return err + } + + columns := rows.FieldDescriptions() + + for i, value := range values { + fmt.Printf("%s: %v\n", columns[i].Name, value) + } + fmt.Println("---") + } + + return rows.Err() +} + +func (s *PeerFlowE2ETestSuitePG) Test_Complete_QRep_Flow_Multi_Insert_PG() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + numRows := 10 + + srcTable := "test_qrep_flow_avro_pg_1" + s.setupSourceTable(srcTable, numRows) + + dstTable := "test_qrep_flow_avro_pg_2" + e2e.CreateSourceTableQRep(s.pool, postgresSuffix, dstTable) // the name is misleading, but this is the destination table + + srcSchemaQualified := fmt.Sprintf("%s_%s.%s", "e2e_test", postgresSuffix, srcTable) + dstSchemaQualified := fmt.Sprintf("%s_%s.%s", "e2e_test", postgresSuffix, dstTable) + + query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", + postgresSuffix, srcTable) + + postgresPeer := e2e.GeneratePostgresPeer(e2e.PostgresPort) + + qrepConfig, err := e2e.CreateQRepWorkflowConfig( + "test_qrep_flow_avro_pg", + srcSchemaQualified, + dstSchemaQualified, + query, + protos.QRepSyncMode_QREP_SYNC_MODE_MULTI_INSERT, + postgresPeer, + "", + ) + s.NoError(err) + + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + + // assert that error contains "invalid connection configs" + err = env.GetWorkflowError() + s.NoError(err) + + err = s.comparePGTables(srcSchemaQualified, dstSchemaQualified) + if err != nil { + s.FailNow(err.Error()) + } + + env.AssertExpectations(s.T()) +} diff --git a/flow/e2e/qrep_flow_test.go b/flow/e2e/qrep_flow_test.go deleted file mode 100644 index c79157bcf..000000000 --- a/flow/e2e/qrep_flow_test.go +++ /dev/null @@ -1,625 +0,0 @@ -package e2e - -import ( - "context" - "encoding/json" - "fmt" - "strings" - - connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" - "github.com/PeerDB-io/peer-flow/generated/protos" - "github.com/PeerDB-io/peer-flow/model" - "github.com/PeerDB-io/peer-flow/model/qvalue" - peerflow "github.com/PeerDB-io/peer-flow/workflows" - "github.com/google/uuid" - "github.com/stretchr/testify/require" - "go.temporal.io/sdk/testsuite" -) - -func (s *E2EPeerFlowTestSuite) createSourceTable(tableName string) { - tblFields := []string{ - "id UUID NOT NULL PRIMARY KEY", - "card_id UUID", - `"from" TIMESTAMP NOT NULL`, - "price NUMERIC", - "created_at TIMESTAMP NOT NULL", - "updated_at TIMESTAMP NOT NULL", - "transaction_hash BYTEA", - "ownerable_type VARCHAR", - "ownerable_id UUID", - "user_nonce INTEGER", - "transfer_type INTEGER DEFAULT 0 NOT NULL", - "blockchain INTEGER NOT NULL", - "deal_type VARCHAR", - "deal_id UUID", - "ethereum_transaction_id UUID", - "ignore_price BOOLEAN DEFAULT false", - "card_eth_value DOUBLE PRECISION", - "paid_eth_price DOUBLE PRECISION", - "card_bought_notified BOOLEAN DEFAULT false NOT NULL", - "address NUMERIC", - "account_id UUID", - "asset_id NUMERIC NOT NULL", - "status INTEGER", - "transaction_id UUID", - "settled_at TIMESTAMP", - "reference_id VARCHAR", - "settle_at TIMESTAMP", - "settlement_delay_reason INTEGER", - "f1 text[]", - "f2 bigint[]", - "f3 int[]", - "f4 varchar[]", - "f5 jsonb", - "f6 jsonb", - "f7 jsonb", - "f8 smallint", - } - - tblFieldStr := strings.Join(tblFields, ",") - - _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE e2e_test.%s ( - %s - );`, tableName, tblFieldStr)) - s.NoError(err) - - fmt.Printf("created table on postgres: e2e_test.%s\n", tableName) -} - -func (s *E2EPeerFlowTestSuite) populateSourceTable(tableName string, rowCount int) { - var ids []string - var rows []string - for i := 0; i < rowCount-1; i++ { - id := uuid.New().String() - ids = append(ids, id) - row := fmt.Sprintf(` - ( - '%s', '%s', CURRENT_TIMESTAMP, 3.86487206688919, CURRENT_TIMESTAMP, - CURRENT_TIMESTAMP, E'\\\\xDEADBEEF', 'type1', '%s', - 1, 0, 1, 'dealType1', - '%s', '%s', false, 1.2345, - 1.2345, false, 12345, '%s', - 12345, 1, '%s', CURRENT_TIMESTAMP, 'refID', - CURRENT_TIMESTAMP, 1, ARRAY['text1', 'text2'], ARRAY[123, 456], ARRAY[789, 012], - ARRAY['varchar1', 'varchar2'], '{"key": 8.5}', - '[{"key1": "value1", "key2": "value2", "key3": "value3"}]', - '{"key": "value"}', 15 - )`, - id, uuid.New().String(), uuid.New().String(), - uuid.New().String(), uuid.New().String(), uuid.New().String(), uuid.New().String()) - rows = append(rows, row) - } - - _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO e2e_test.%s ( - id, card_id, "from", price, created_at, - updated_at, transaction_hash, ownerable_type, ownerable_id, - user_nonce, transfer_type, blockchain, deal_type, - deal_id, ethereum_transaction_id, ignore_price, card_eth_value, - paid_eth_price, card_bought_notified, address, account_id, - asset_id, status, transaction_id, settled_at, reference_id, - settle_at, settlement_delay_reason, f1, f2, f3, f4, f5, f6, f7, f8 - ) VALUES %s; - `, tableName, strings.Join(rows, ","))) - require.NoError(s.T(), err) - - // add a row where all the nullable fields are null - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO e2e_test.%s ( - id, "from", created_at, updated_at, - transfer_type, blockchain, card_bought_notified, asset_id - ) VALUES ( - '%s', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, - 0, 1, false, 12345 - ); - `, tableName, uuid.New().String())) - require.NoError(s.T(), err) - - // generate a 20 MB json and update id[0]'s col f5 to it - v := s.generate20MBJson() - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - UPDATE e2e_test.%s SET f5 = '%s' WHERE id = '%s'; - `, tableName, v, ids[0])) - require.NoError(s.T(), err) -} - -func (s *E2EPeerFlowTestSuite) generate20MBJson() []byte { - xn := make(map[string]interface{}) - for i := 0; i < 215000; i++ { - xn[uuid.New().String()] = uuid.New().String() - } - - v, err := json.Marshal(xn) - require.NoError(s.T(), err) - - return v -} - -func (s *E2EPeerFlowTestSuite) setupSourceTable(tableName string, rowCount int) { - s.createSourceTable(tableName) - s.populateSourceTable(tableName, rowCount) -} - -func getOwnersSchema() *model.QRecordSchema { - return &model.QRecordSchema{ - Fields: []*model.QField{ - {Name: "id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "card_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "from", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "price", Type: qvalue.QValueKindNumeric, Nullable: true}, - {Name: "created_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "updated_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "transaction_hash", Type: qvalue.QValueKindBytes, Nullable: true}, - {Name: "ownerable_type", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "ownerable_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "user_nonce", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "transfer_type", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "blockchain", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "deal_type", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "deal_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "ethereum_transaction_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "ignore_price", Type: qvalue.QValueKindBoolean, Nullable: true}, - {Name: "card_eth_value", Type: qvalue.QValueKindFloat64, Nullable: true}, - {Name: "paid_eth_price", Type: qvalue.QValueKindFloat64, Nullable: true}, - {Name: "card_bought_notified", Type: qvalue.QValueKindBoolean, Nullable: true}, - {Name: "address", Type: qvalue.QValueKindNumeric, Nullable: true}, - {Name: "account_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "asset_id", Type: qvalue.QValueKindNumeric, Nullable: true}, - {Name: "status", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "transaction_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "settled_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "reference_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "settle_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "settlement_delay_reason", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "f1", Type: qvalue.QValueKindArrayString, Nullable: true}, - {Name: "f2", Type: qvalue.QValueKindArrayInt64, Nullable: true}, - {Name: "f3", Type: qvalue.QValueKindArrayInt32, Nullable: true}, - {Name: "f4", Type: qvalue.QValueKindArrayString, Nullable: true}, - {Name: "f5", Type: qvalue.QValueKindJSON, Nullable: true}, - {Name: "f6", Type: qvalue.QValueKindJSON, Nullable: true}, - {Name: "f7", Type: qvalue.QValueKindJSON, Nullable: true}, - {Name: "f8", Type: qvalue.QValueKindInt16, Nullable: true}, - }, - } -} - -func getOwnersSelectorString() string { - schema := getOwnersSchema() - var fields []string - for _, field := range schema.Fields { - // append quoted field name - fields = append(fields, fmt.Sprintf(`"%s"`, field.Name)) - } - return strings.Join(fields, ",") -} - -func (s *E2EPeerFlowTestSuite) setupBQDestinationTable(dstTable string) { - schema := getOwnersSchema() - err := s.bqHelper.CreateTable(dstTable, schema) - - // fail if table creation fails - require.NoError(s.T(), err) - - fmt.Printf("created table on bigquery: %s.%s. %v\n", s.bqHelper.Config.DatasetId, dstTable, err) -} - -func (s *E2EPeerFlowTestSuite) setupSFDestinationTable(dstTable string) { - schema := getOwnersSchema() - err := s.sfHelper.CreateTable(dstTable, schema) - - // fail if table creation fails - if err != nil { - s.FailNow("unable to create table on snowflake", err) - } - - fmt.Printf("created table on snowflake: %s.%s. %v\n", s.sfHelper.testSchemaName, dstTable, err) -} - -func (s *E2EPeerFlowTestSuite) createQRepWorkflowConfig( - flowJobName string, - sourceTable string, - dstTable string, - query string, - syncMode protos.QRepSyncMode, - dest *protos.Peer, - stagingPath string, -) *protos.QRepConfig { - connectionGen := QRepFlowConnectionGenerationConfig{ - FlowJobName: flowJobName, - WatermarkTable: sourceTable, - DestinationTableIdentifier: dstTable, - PostgresPort: postgresPort, - Destination: dest, - StagingPath: stagingPath, - } - - watermark := "updated_at" - - qrepConfig, err := connectionGen.GenerateQRepConfig(query, watermark, syncMode) - s.NoError(err) - - qrepConfig.InitialCopyOnly = true - - return qrepConfig -} - -func (s *E2EPeerFlowTestSuite) compareTableContentsBQ(tableName string, colsString string) { - // read rows from source table - pgQueryExecutor := connpostgres.NewQRepQueryExecutor(s.pool, context.Background(), "testflow", "testpart") - pgQueryExecutor.SetTestEnv(true) - - pgRows, err := pgQueryExecutor.ExecuteAndProcessQuery( - fmt.Sprintf("SELECT %s FROM e2e_test.%s ORDER BY id", colsString, tableName), - ) - s.NoError(err) - - // read rows from destination table - qualifiedTableName := fmt.Sprintf("`%s.%s`", s.bqHelper.Config.DatasetId, tableName) - bqRows, err := s.bqHelper.ExecuteAndProcessQuery( - fmt.Sprintf("SELECT %s FROM %s ORDER BY id", colsString, qualifiedTableName), - ) - s.NoError(err) - - s.True(pgRows.Equals(bqRows), "rows from source and destination tables are not equal") -} - -func (s *E2EPeerFlowTestSuite) compareTableContentsSF(tableName string, selector string, caseSensitive bool) { - // read rows from source table - pgQueryExecutor := connpostgres.NewQRepQueryExecutor(s.pool, context.Background(), "testflow", "testpart") - pgQueryExecutor.SetTestEnv(true) - pgRows, err := pgQueryExecutor.ExecuteAndProcessQuery( - fmt.Sprintf("SELECT %s FROM e2e_test.%s ORDER BY id", selector, tableName), - ) - require.NoError(s.T(), err) - - // read rows from destination table - qualifiedTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tableName) - var sfSelQuery string - if caseSensitive { - sfSelQuery = fmt.Sprintf(`SELECT %s FROM %s ORDER BY "id"`, selector, qualifiedTableName) - } else { - sfSelQuery = fmt.Sprintf(`SELECT %s FROM %s ORDER BY id`, selector, qualifiedTableName) - } - fmt.Printf("running query on snowflake: %s\n", sfSelQuery) - - // sleep for 1 min for debugging - // time.Sleep(1 * time.Minute) - - sfRows, err := s.sfHelper.ExecuteAndProcessQuery(sfSelQuery) - require.NoError(s.T(), err) - - s.True(pgRows.Equals(sfRows), "rows from source and destination tables are not equal") -} - -func (s *E2EPeerFlowTestSuite) comparePGTables(srcSchemaQualified, dstSchemaQualified string) error { - // Execute the two EXCEPT queries - err := s.compareQuery(srcSchemaQualified, dstSchemaQualified) - if err != nil { - return err - } - - err = s.compareQuery(dstSchemaQualified, srcSchemaQualified) - if err != nil { - return err - } - - // If no error is returned, then the contents of the two tables are the same - return nil -} - -func (s *E2EPeerFlowTestSuite) compareQuery(schema1, schema2 string) error { - query := fmt.Sprintf("SELECT * FROM %s EXCEPT SELECT * FROM %s", schema1, schema2) - rows, _ := s.pool.Query(context.Background(), query) - - defer rows.Close() - for rows.Next() { - values, err := rows.Values() - if err != nil { - return err - } - - columns := rows.FieldDescriptions() - - for i, value := range values { - fmt.Printf("%s: %v\n", columns[i].Name, value) - } - fmt.Println("---") - } - - return rows.Err() -} - -// NOTE: Disabled due to large JSON tests being added: https://github.com/PeerDB-io/peerdb/issues/309 - -// Test_Complete_QRep_Flow tests a complete flow with data in the source table. -// The test inserts 10 rows into the source table and verifies that the data is -// // correctly synced to the destination table this runs a QRep Flow. -// func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Multi_Insert() { -// env := s.NewTestWorkflowEnvironment() -// registerWorkflowsAndActivities(env) - -// numRows := 10 - -// tblName := "test_qrep_flow_multi_insert" -// s.setupSourceTable(tblName, numRows) -// s.setupBQDestinationTable(tblName) - -// query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", tblName) - -// qrepConfig := s.createQRepWorkflowConfig("test_qrep_flow_mi", -// "e2e_test."+tblName, -// tblName, -// query, -// protos.QRepSyncMode_QREP_SYNC_MODE_MULTI_INSERT, -// s.bqHelper.Peer) -// runQrepFlowWorkflow(env, qrepConfig) - -// // Verify workflow completes without error -// s.True(env.IsWorkflowCompleted()) - -// // assert that error contains "invalid connection configs" -// err := env.GetWorkflowError() -// s.NoError(err) - -// count, err := s.bqHelper.CountRows(tblName) -// s.NoError(err) - -// s.Equal(numRows, count) - -// env.AssertExpectations(s.T()) -// } - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Avro() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - numRows := 10 - - tblName := "test_qrep_flow_avro" - s.setupSourceTable(tblName, numRows) - s.setupBQDestinationTable(tblName) - - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", tblName) - - qrepConfig := s.createQRepWorkflowConfig( - "test_qrep_flow_avro", - "e2e_test."+tblName, - tblName, - query, - protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - s.bqHelper.Peer, - "peerdb_staging") - runQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - - // assert that error contains "invalid connection configs" - err := env.GetWorkflowError() - s.NoError(err) - - s.compareTableContentsBQ(tblName, "*") - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Avro_SF() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - numRows := 10 - - tblName := "test_qrep_flow_avro_sf" - s.setupSourceTable(tblName, numRows) - s.setupSFDestinationTable(tblName) - - dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) - - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", tblName) - - qrepConfig := s.createQRepWorkflowConfig( - "test_qrep_flow_avro_Sf", - "e2e_test."+tblName, - dstSchemaQualified, - query, - protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - s.sfHelper.Peer, - "", - ) - - runQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - - // assert that error contains "invalid connection configs" - err := env.GetWorkflowError() - s.NoError(err) - - sel := getOwnersSelectorString() - s.compareTableContentsSF(tblName, sel, true) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - numRows := 10 - - tblName := "test_qrep_flow_avro_sf_ups" - s.setupSourceTable(tblName, numRows) - s.setupSFDestinationTable(tblName) - - dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) - - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", tblName) - - qrepConfig := s.createQRepWorkflowConfig( - "test_qrep_flow_avro_Sf", - "e2e_test."+tblName, - dstSchemaQualified, - query, - protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - s.sfHelper.Peer, - "", - ) - qrepConfig.WriteMode = &protos.QRepWriteMode{ - WriteType: protos.QRepWriteType_QREP_WRITE_MODE_UPSERT, - UpsertKeyColumns: []string{"id"}, - } - - runQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - - // assert that error contains "invalid connection configs" - err := env.GetWorkflowError() - s.NoError(err) - - sel := getOwnersSelectorString() - s.compareTableContentsSF(tblName, sel, true) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Multi_Insert_PG() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - numRows := 10 - - srcTable := "test_qrep_flow_avro_pg_1" - s.setupSourceTable(srcTable, numRows) - - dstTable := "test_qrep_flow_avro_pg_2" - s.createSourceTable(dstTable) // the name is misleading, but this is the destination table - - srcSchemaQualified := fmt.Sprintf("%s.%s", "e2e_test", srcTable) - dstSchemaQualified := fmt.Sprintf("%s.%s", "e2e_test", dstTable) - - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", srcTable) - - postgresPeer := GeneratePostgresPeer(postgresPort) - - qrepConfig := s.createQRepWorkflowConfig( - "test_qrep_flow_avro_pg", - srcSchemaQualified, - dstSchemaQualified, - query, - protos.QRepSyncMode_QREP_SYNC_MODE_MULTI_INSERT, - postgresPeer, - "", - ) - - runQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - - // assert that error contains "invalid connection configs" - err := env.GetWorkflowError() - s.NoError(err) - - err = s.comparePGTables(srcSchemaQualified, dstSchemaQualified) - if err != nil { - s.FailNow(err.Error()) - } - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Avro_SF_S3() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - numRows := 10 - - tblName := "test_qrep_flow_avro_sf_s3" - s.setupSourceTable(tblName, numRows) - s.setupSFDestinationTable(tblName) - - dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) - - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", tblName) - - qrepConfig := s.createQRepWorkflowConfig( - "test_qrep_flow_avro_sf", - "e2e_test."+tblName, - dstSchemaQualified, - query, - protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - s.sfHelper.Peer, - "", - ) - qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) - - runQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - - // assert that error contains "invalid connection configs" - err := env.GetWorkflowError() - s.NoError(err) - - sel := getOwnersSelectorString() - s.compareTableContentsSF(tblName, sel, true) - - env.AssertExpectations(s.T()) -} - -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() { - env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) - - numRows := 10 - - tblName := "test_qrep_flow_avro_sf_s3_int" - s.setupSourceTable(tblName, numRows) - s.setupSFDestinationTable(tblName) - - dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) - - query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", tblName) - - sfPeer := s.sfHelper.Peer - sfPeer.GetSnowflakeConfig().S3Integration = "peerdb_s3_integration" - - qrepConfig := s.createQRepWorkflowConfig( - "test_qrep_flow_avro_sf_int", - "e2e_test."+tblName, - dstSchemaQualified, - query, - protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - sfPeer, - "", - ) - qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) - - runQrepFlowWorkflow(env, qrepConfig) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - - // assert that error contains "invalid connection configs" - err := env.GetWorkflowError() - s.NoError(err) - - sel := getOwnersSelectorString() - s.compareTableContentsSF(tblName, sel, true) - - env.AssertExpectations(s.T()) -} - -func runQrepFlowWorkflow(env *testsuite.TestWorkflowEnvironment, config *protos.QRepConfig) { - lastPartition := &protos.QRepPartition{ - PartitionId: "not-applicable-partition", - Range: nil, - } - numPartitionsProcessed := 0 - env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, lastPartition, numPartitionsProcessed) -} diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go new file mode 100644 index 000000000..e47d23592 --- /dev/null +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -0,0 +1,183 @@ +package e2e_s3 + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/PeerDB-io/peer-flow/e2e" + "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/joho/godotenv" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "go.temporal.io/sdk/testsuite" +) + +const s3Suffix = "s3" + +type PeerFlowE2ETestSuiteS3 struct { + suite.Suite + testsuite.WorkflowTestSuite + + pool *pgxpool.Pool + s3Helper *S3TestHelper +} + +func TestPeerFlowE2ETestSuiteS3(t *testing.T) { + suite.Run(t, new(PeerFlowE2ETestSuiteS3)) +} + +func (s *PeerFlowE2ETestSuiteS3) setupSourceTable(tableName string, rowCount int) { + err := e2e.CreateSourceTableQRep(s.pool, s3Suffix, tableName) + s.NoError(err) + err = e2e.PopulateSourceTable(s.pool, s3Suffix, tableName, rowCount) + s.NoError(err) +} + +func (s *PeerFlowE2ETestSuiteS3) setupS3() error { + helper, err := NewS3TestHelper() + if err != nil { + return err + } + + s.s3Helper = helper + return nil +} + +func (s *PeerFlowE2ETestSuiteS3) SetupSuite() { + err := godotenv.Load() + if err != nil { + // it's okay if the .env file is not present + // we will use the default values + log.Infof("Unable to load .env file, using default values from env") + } + + log.SetReportCaller(true) + + pool, err := e2e.SetupPostgres(s3Suffix) + if err != nil { + s.Fail("failed to setup postgres", err) + } + s.pool = pool + + err = s.setupS3() + if err != nil { + s.Fail("failed to setup S3", err) + } +} + +// Implement TearDownAllSuite interface to tear down the test suite +func (s *PeerFlowE2ETestSuiteS3) TearDownSuite() { + err := e2e.TearDownPostgres(s.pool, s3Suffix) + if err != nil { + s.Fail("failed to drop Postgres schema", err) + } + + if s.s3Helper != nil { + err = s.s3Helper.CleanUp() + if err != nil { + s.Fail("failed to clean up s3", err) + } + } +} + +func (s *PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { + if s.s3Helper == nil { + s.T().Skip("Skipping S3 test") + } + + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + jobName := "test_complete_flow_s3" + schemaQualifiedName := fmt.Sprintf("e2e_test_%s.%s", s3Suffix, jobName) + + s.setupSourceTable(jobName, 10) + query := fmt.Sprintf("SELECT * FROM %s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", + schemaQualifiedName) + qrepConfig, err := e2e.CreateQRepWorkflowConfig( + jobName, + schemaQualifiedName, + "e2e_dest_1", + query, + protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + s.s3Helper.GetPeer(), + "stage", + ) + s.NoError(err) + qrepConfig.StagingPath = s.s3Helper.s3Config.Url + + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + s.NoError(err) + + // Verify destination has 1 file + // make context with timeout + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + files, err := s.s3Helper.ListAllFiles(ctx, jobName) + + require.NoError(s.T(), err) + + require.Equal(s.T(), 1, len(files)) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { + if s.s3Helper == nil { + s.T().Skip("Skipping S3 test") + } + + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + jobName := "test_complete_flow_s3_ctid" + schemaQualifiedName := fmt.Sprintf("e2e_test_%s.%s", s3Suffix, jobName) + + s.setupSourceTable(jobName, 20000) + query := fmt.Sprintf("SELECT * FROM %s WHERE ctid BETWEEN {{.start}} AND {{.end}}", schemaQualifiedName) + qrepConfig, err := e2e.CreateQRepWorkflowConfig( + jobName, + schemaQualifiedName, + "e2e_dest_ctid", + query, + protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + s.s3Helper.GetPeer(), + "stage", + ) + s.NoError(err) + qrepConfig.StagingPath = s.s3Helper.s3Config.Url + qrepConfig.NumRowsPerPartition = 2000 + qrepConfig.InitialCopyOnly = true + qrepConfig.WatermarkColumn = "ctid" + + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + s.NoError(err) + + // Verify destination has 1 file + // make context with timeout + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + files, err := s.s3Helper.ListAllFiles(ctx, jobName) + + require.NoError(s.T(), err) + + require.Equal(s.T(), 10, len(files)) + + env.AssertExpectations(s.T()) +} diff --git a/flow/e2e/s3_helper.go b/flow/e2e/s3/s3_helper.go similarity index 99% rename from flow/e2e/s3_helper.go rename to flow/e2e/s3/s3_helper.go index 49307e624..9dbe958ee 100644 --- a/flow/e2e/s3_helper.go +++ b/flow/e2e/s3/s3_helper.go @@ -1,4 +1,4 @@ -package e2e +package e2e_s3 import ( "context" diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go new file mode 100644 index 000000000..db5504696 --- /dev/null +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -0,0 +1,800 @@ +package e2e_snowflake + +import ( + "context" + "fmt" + "testing" + + "github.com/PeerDB-io/peer-flow/e2e" + "github.com/PeerDB-io/peer-flow/generated/protos" + peerflow "github.com/PeerDB-io/peer-flow/workflows" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/joho/godotenv" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/suite" + "go.temporal.io/sdk/testsuite" +) + +const snowflakeSuffix = "snowflake" + +type PeerFlowE2ETestSuiteSF struct { + suite.Suite + testsuite.WorkflowTestSuite + + pool *pgxpool.Pool + sfHelper *SnowflakeTestHelper +} + +func TestPeerFlowE2ETestSuiteSF(t *testing.T) { + suite.Run(t, new(PeerFlowE2ETestSuiteSF)) +} + +func (s *PeerFlowE2ETestSuiteSF) attachSchemaSuffix(tableName string) string { + return fmt.Sprintf("e2e_test_%s.%s", snowflakeSuffix, tableName) +} + +func (s *PeerFlowE2ETestSuiteSF) attachSuffix(input string) string { + return fmt.Sprintf("%s_%s", input, snowflakeSuffix) +} + +// setupSnowflake sets up the snowflake connection. +func (s *PeerFlowE2ETestSuiteSF) setupSnowflake() error { + sfHelper, err := NewSnowflakeTestHelper() + if err != nil { + return fmt.Errorf("failed to create snowflake helper: %w", err) + } + + s.sfHelper = sfHelper + + return nil +} + +func (s *PeerFlowE2ETestSuiteSF) SetupSuite() { + err := godotenv.Load() + if err != nil { + // it's okay if the .env file is not present + // we will use the default values + log.Infof("Unable to load .env file, using default values from env") + } + + log.SetReportCaller(true) + + pool, err := e2e.SetupPostgres(snowflakeSuffix) + if err != nil { + s.Fail("failed to setup postgres", err) + } + s.pool = pool + + err = s.setupSnowflake() + if err != nil { + s.Fail("failed to setup snowflake", err) + } +} + +// Implement TearDownAllSuite interface to tear down the test suite +func (s *PeerFlowE2ETestSuiteSF) TearDownSuite() { + err := e2e.TearDownPostgres(s.pool, snowflakeSuffix) + if err != nil { + s.Fail("failed to drop Postgres schema", err) + } + + if s.sfHelper != nil { + err = s.sfHelper.Cleanup() + if err != nil { + s.Fail("failed to clean up Snowflake", err) + } + } +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_simple_flow_sf") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_simple_flow_sf") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + key TEXT NOT NULL, + value TEXT NOT NULL + ); + `, srcTableName)) + s.NoError(err) + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_simple_flow"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 2, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and then insert 10 rows into the source table + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + // insert 10 rows into the source table + for i := 0; i < 10; i++ { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (key, value) VALUES ($1, $2) + `, srcTableName), testKey, testValue) + s.NoError(err) + } + fmt.Println("Inserted 10 rows into the source table") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + count, err := s.sfHelper.CountRows("test_simple_flow_sf") + s.NoError(err) + s.Equal(10, count) + + // TODO: verify that the data is correctly synced to the destination table + // on the bigquery side + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF_Avro_CDC() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_simple_flow_sf_avro_cdc") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_simple_flow_sf_avro_cdc") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + key TEXT NOT NULL, + value TEXT NOT NULL + ); + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_simple_flow_avro"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 2, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and then insert 10 rows into the source table + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + // insert 10 rows into the source table + for i := 0; i < 10; i++ { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (key, value) VALUES ($1, $2) + `, srcTableName), testKey, testValue) + s.NoError(err) + } + fmt.Println("Inserted 10 rows into the source table") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + count, err := s.sfHelper.CountRows("test_simple_flow_sf_avro_cdc") + s.NoError(err) + s.Equal(10, count) + + // TODO: verify that the data is correctly synced to the destination table + // on the bigquery side + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_sf_1") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_1") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_sf_1"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* + Executing a transaction which + 1. changes both toast column + 2. changes no toast column + 2. changes 1 toast column + */ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + END; + `, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsSF("test_toast_sf_1", `id,t1,t2,k`, false) + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_sf_2") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_2") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_sf_2"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* transaction updating no rows */ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + END; + `, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsSF("test_toast_sf_2", `id,t1,t2,k`, false) + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_sf_3") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_3") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_sf_3"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 2, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + //complex transaction with random DMLs on a table with toast columns + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=2; + UPDATE %s SET t2='dummy' WHERE id=2; + DELETE FROM %s WHERE id=1; + INSERT INTO %s(t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,2); + UPDATE %s SET k=1 WHERE id=1; + UPDATE %s SET t1='dummy1',t2='dummy2' WHERE id=1; + UPDATE %s SET t1='dummy3' WHERE id=3; + DELETE FROM %s WHERE id=2; + DELETE FROM %s WHERE id=3; + DELETE FROM %s WHERE id=2; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, + srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsSF("test_toast_sf_3", `id,t1,t2,k`, false) + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_sf_4") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_4") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + t1 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_sf_4"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + //complex transaction with random DMLs on a table with toast columns + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,k) SELECT random_string(9000), + 1 FROM generate_series(1,1); + UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 + FROM generate_series(1,1) ) sub WHERE id=1; + UPDATE %s SET k=2 WHERE id=1; + UPDATE %s SET k=3 WHERE id=1; + UPDATE %s SET t1=sub.t1 FROM (SELECT random_string(9000) t1 + FROM generate_series(1,1)) sub WHERE id=1; + UPDATE %s SET k=4 WHERE id=1; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsSF("test_toast_sf_4", `id,t1,k`, false) + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_toast_sf_5") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_toast_sf_5") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s ( + id SERIAL PRIMARY KEY, + t1 text, + t2 text, + k int + );CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_toast_sf_5"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* + transaction updating a single row + multiple times with changed/unchanged toast columns + */ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + BEGIN; + INSERT INTO %s (t1,t2,k) SELECT random_string(9000),random_string(9000), + 1 FROM generate_series(1,1); + UPDATE %s SET k=102 WHERE id=1; + UPDATE %s SET t1='dummy' WHERE id=1; + UPDATE %s SET t2='dummy' WHERE id=1; + END; + `, srcTableName, srcTableName, srcTableName, srcTableName)) + s.NoError(err) + fmt.Println("Executed a transaction touching toast columns") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + s.compareTableContentsSF("test_toast_sf_5", `id,t1,t2,k`, false) + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_types_sf") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_types_sf") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s (id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, + c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, + c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, + c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, + c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, + c39 TXID_SNAPSHOT,c40 UUID,c41 XML); + CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) + RETURNS bytea AS $body$ + SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') + FROM generate_series(1, $1); + $body$ + LANGUAGE 'sql' + VOLATILE + SET search_path = 'pg_catalog'; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_types_sf"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* test inserting various types*/ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s SELECT 2,2,b'1',b'101', + true,random_bytea(32),'s','test','1.1.10.2'::cidr, + CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, + '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, + '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, + 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, + 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, + txid_current_snapshot(), + '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'); + `, srcTableName)) + s.NoError(err) + fmt.Println("Executed an insert with all types") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + noNulls, err := s.sfHelper.CheckNull("test_types_sf", []string{"c41", "c1", "c2", "c3", "c4", + "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", + "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", + "c37", "c38", "c7", "c8", "c32"}) + if err != nil { + fmt.Println("error %w", err) + } + // Make sure that there are no nulls + s.Equal(noNulls, true) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF_Avro_CDC() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTableName := s.attachSchemaSuffix("test_types_sf_avro_cdc") + dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_types_sf_avro_cdc") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s (id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, + c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, + c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, + c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, + c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, + c39 TXID_SNAPSHOT,c40 UUID,c41 XML); + CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) + RETURNS bytea AS $body$ + SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') + FROM generate_series(1, $1); + $body$ + LANGUAGE 'sql' + VOLATILE + SET search_path = 'pg_catalog'; + `, srcTableName)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_types_sf"), + TableNameMapping: map[string]string{srcTableName: dstTableName}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* test inserting various types*/ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s SELECT 2,2,b'1',b'101', + true,random_bytea(32),'s','test','1.1.10.2'::cidr, + CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, + '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, + '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, + 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, + 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, + txid_current_snapshot(), + '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'); + `, srcTableName)) + s.NoError(err) + fmt.Println("Executed an insert with all types") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + // allow only continue as new error + s.Error(err) + s.Contains(err.Error(), "continue as new") + + noNulls, err := s.sfHelper.CheckNull("test_types_sf_avro_cdc", []string{"c41", "c1", "c2", "c3", "c4", + "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", + "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", + "c37", "c38", "c7", "c8", "c32"}) + if err != nil { + fmt.Println("error %w", err) + } + // Make sure that there are no nulls + s.Equal(noNulls, true) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + srcTable1Name := s.attachSchemaSuffix("test1_sf") + srcTable2Name := s.attachSchemaSuffix("test2_sf") + dstTable1Name := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test1_sf") + dstTable2Name := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test2_sf") + + _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE %s (id serial primary key, c1 int, c2 text); + CREATE TABLE %s (id serial primary key, c1 int, c2 text); + `, srcTable1Name, srcTable2Name)) + s.NoError(err) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_multi_table"), + TableNameMapping: map[string]string{srcTable1Name: dstTable1Name, srcTable2Name: dstTable2Name}, + PostgresPort: e2e.PostgresPort, + Destination: s.sfHelper.Peer, + } + + flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() + s.NoError(err) + + limits := peerflow.PeerFlowLimits{ + TotalSyncFlows: 1, + MaxBatchSize: 100, + } + + // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup + // and execute a transaction touching toast columns + go func() { + e2e.SetupPeerFlowStatusQuery(env, connectionGen) + /* inserting across multiple tables*/ + _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); + INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); + `, srcTable1Name, srcTable2Name)) + s.NoError(err) + fmt.Println("Executed an insert with all types") + }() + + env.ExecuteWorkflow(peerflow.PeerFlowWorkflowWithConfig, flowConnConfig, &limits, nil) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + err = env.GetWorkflowError() + + count1, err := s.sfHelper.CountRows("test1_sf") + s.NoError(err) + count2, err := s.sfHelper.CountRows("test2_sf") + s.NoError(err) + + s.Equal(1, count1) + s.Equal(1, count2) + + env.AssertExpectations(s.T()) +} diff --git a/flow/e2e/snowflake/qrep_flow_sf_test.go b/flow/e2e/snowflake/qrep_flow_sf_test.go new file mode 100644 index 000000000..4d42adbcc --- /dev/null +++ b/flow/e2e/snowflake/qrep_flow_sf_test.go @@ -0,0 +1,232 @@ +package e2e_snowflake + +import ( + "context" + "fmt" + + connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" + "github.com/PeerDB-io/peer-flow/e2e" + "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func (s *PeerFlowE2ETestSuiteSF) setupSourceTable(tableName string, rowCount int) { + err := e2e.CreateSourceTableQRep(s.pool, snowflakeSuffix, tableName) + s.NoError(err) + err = e2e.PopulateSourceTable(s.pool, snowflakeSuffix, tableName, rowCount) + s.NoError(err) +} + +func (s *PeerFlowE2ETestSuiteSF) setupSFDestinationTable(dstTable string) { + schema := e2e.GetOwnersSchema() + err := s.sfHelper.CreateTable(dstTable, schema) + + // fail if table creation fails + if err != nil { + s.FailNow("unable to create table on snowflake", err) + } + + fmt.Printf("created table on snowflake: %s.%s. %v\n", s.sfHelper.testSchemaName, dstTable, err) +} + +func (s *PeerFlowE2ETestSuiteSF) compareTableContentsSF(tableName string, selector string, caseSensitive bool) { + // read rows from source table + pgQueryExecutor := connpostgres.NewQRepQueryExecutor(s.pool, context.Background(), "testflow", "testpart") + pgQueryExecutor.SetTestEnv(true) + pgRows, err := pgQueryExecutor.ExecuteAndProcessQuery( + fmt.Sprintf("SELECT %s FROM e2e_test_%s.%s ORDER BY id", selector, snowflakeSuffix, tableName), + ) + require.NoError(s.T(), err) + + // read rows from destination table + qualifiedTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tableName) + var sfSelQuery string + if caseSensitive { + sfSelQuery = fmt.Sprintf(`SELECT %s FROM %s ORDER BY "id"`, selector, qualifiedTableName) + } else { + sfSelQuery = fmt.Sprintf(`SELECT %s FROM %s ORDER BY id`, selector, qualifiedTableName) + } + fmt.Printf("running query on snowflake: %s\n", sfSelQuery) + + // sleep for 1 min for debugging + // time.Sleep(1 * time.Minute) + + sfRows, err := s.sfHelper.ExecuteAndProcessQuery(sfSelQuery) + require.NoError(s.T(), err) + + s.True(pgRows.Equals(sfRows), "rows from source and destination tables are not equal") +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + numRows := 10 + + tblName := "test_qrep_flow_avro_sf" + s.setupSourceTable(tblName, numRows) + s.setupSFDestinationTable(tblName) + + dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) + + query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", + snowflakeSuffix, tblName) + + qrepConfig, err := e2e.CreateQRepWorkflowConfig( + "test_qrep_flow_avro_sf", + fmt.Sprintf("e2e_test_%s.%s", snowflakeSuffix, tblName), + dstSchemaQualified, + query, + protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + s.sfHelper.Peer, + "", + ) + s.NoError(err) + + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + + // assert that error contains "invalid connection configs" + err = env.GetWorkflowError() + s.NoError(err) + + sel := e2e.GetOwnersSelectorString() + s.compareTableContentsSF(tblName, sel, true) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + numRows := 10 + + tblName := "test_qrep_flow_avro_sf_ups" + s.setupSourceTable(tblName, numRows) + s.setupSFDestinationTable(tblName) + + dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) + + query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", + snowflakeSuffix, tblName) + + qrepConfig, err := e2e.CreateQRepWorkflowConfig( + "test_qrep_flow_avro_sf", + fmt.Sprintf("e2e_test_%s.%s", snowflakeSuffix, tblName), + dstSchemaQualified, + query, + protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + s.sfHelper.Peer, + "", + ) + qrepConfig.WriteMode = &protos.QRepWriteMode{ + WriteType: protos.QRepWriteType_QREP_WRITE_MODE_UPSERT, + UpsertKeyColumns: []string{"id"}, + } + s.NoError(err) + + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + + // assert that error contains "invalid connection configs" + err = env.GetWorkflowError() + s.NoError(err) + + sel := e2e.GetOwnersSelectorString() + s.compareTableContentsSF(tblName, sel, true) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + numRows := 10 + + tblName := "test_qrep_flow_avro_sf_s3" + s.setupSourceTable(tblName, numRows) + s.setupSFDestinationTable(tblName) + + dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) + + query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", + snowflakeSuffix, tblName) + + qrepConfig, err := e2e.CreateQRepWorkflowConfig( + "test_qrep_flow_avro_sf", + s.attachSchemaSuffix(tblName), + dstSchemaQualified, + query, + protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + s.sfHelper.Peer, + "", + ) + s.NoError(err) + qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) + + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + + // assert that error contains "invalid connection configs" + err = env.GetWorkflowError() + s.NoError(err) + + sel := e2e.GetOwnersSelectorString() + s.compareTableContentsSF(tblName, sel, true) + + env.AssertExpectations(s.T()) +} + +func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() { + env := s.NewTestWorkflowEnvironment() + e2e.RegisterWorkflowsAndActivities(env) + + numRows := 10 + + tblName := "test_qrep_flow_avro_sf_s3_int" + s.setupSourceTable(tblName, numRows) + s.setupSFDestinationTable(tblName) + + dstSchemaQualified := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblName) + + query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", + snowflakeSuffix, tblName) + + sfPeer := s.sfHelper.Peer + sfPeer.GetSnowflakeConfig().S3Integration = "peerdb_s3_integration" + + qrepConfig, err := e2e.CreateQRepWorkflowConfig( + "test_qrep_flow_avro_sf_int", + s.attachSchemaSuffix(tblName), + dstSchemaQualified, + query, + protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + sfPeer, + "", + ) + s.NoError(err) + qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) + + e2e.RunQrepFlowWorkflow(env, qrepConfig) + + // Verify workflow completes without error + s.True(env.IsWorkflowCompleted()) + + // assert that error contains "invalid connection configs" + err = env.GetWorkflowError() + s.NoError(err) + + sel := e2e.GetOwnersSelectorString() + s.compareTableContentsSF(tblName, sel, true) + + env.AssertExpectations(s.T()) +} diff --git a/flow/e2e/snowflake/snowflake_helper.go b/flow/e2e/snowflake/snowflake_helper.go new file mode 100644 index 000000000..2a39daf93 --- /dev/null +++ b/flow/e2e/snowflake/snowflake_helper.go @@ -0,0 +1,131 @@ +package e2e_snowflake + +import ( + "context" + "encoding/json" + "fmt" + "os" + + connsnowflake "github.com/PeerDB-io/peer-flow/connectors/snowflake" + "github.com/PeerDB-io/peer-flow/e2e" + "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/PeerDB-io/peer-flow/model" + util "github.com/PeerDB-io/peer-flow/utils" +) + +type SnowflakeTestHelper struct { + // config is the Snowflake config. + Config *protos.SnowflakeConfig + // peer struct holder Snowflake + Peer *protos.Peer + // connection to another database, to manage the test database + adminClient *connsnowflake.SnowflakeClient + // connection to the test database + testClient *connsnowflake.SnowflakeClient + // testSchemaName is the schema to use for testing. + testSchemaName string + // dbName is the database used for testing. + testDatabaseName string +} + +func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { + jsonPath := os.Getenv("TEST_SF_CREDS") + if jsonPath == "" { + return nil, fmt.Errorf("TEST_SF_CREDS env var not set") + } + + content, err := e2e.ReadFileToBytes(jsonPath) + if err != nil { + return nil, fmt.Errorf("failed to read file: %w", err) + } + + var config protos.SnowflakeConfig + err = json.Unmarshal(content, &config) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal json: %w", err) + } + + peer := generateSFPeer(&config) + runID, err := util.RandomUInt64() + if err != nil { + return nil, fmt.Errorf("failed to generate random uint64: %w", err) + } + + testDatabaseName := fmt.Sprintf("e2e_test_%d", runID) + + adminClient, err := connsnowflake.NewSnowflakeClient(context.Background(), &config) + if err != nil { + return nil, fmt.Errorf("failed to create Snowflake client: %w", err) + } + err = adminClient.ExecuteQuery(fmt.Sprintf("CREATE DATABASE %s", testDatabaseName)) + if err != nil { + return nil, fmt.Errorf("failed to create Snowflake test database: %w", err) + } + + config.Database = testDatabaseName + testClient, err := connsnowflake.NewSnowflakeClient(context.Background(), &config) + if err != nil { + return nil, fmt.Errorf("failed to create Snowflake client: %w", err) + } + + return &SnowflakeTestHelper{ + Config: &config, + Peer: peer, + adminClient: adminClient, + testClient: testClient, + testSchemaName: "PUBLIC", + testDatabaseName: testDatabaseName, + }, nil +} + +func generateSFPeer(snowflakeConfig *protos.SnowflakeConfig) *protos.Peer { + ret := &protos.Peer{} + ret.Name = "test_sf_peer" + ret.Type = protos.DBType_SNOWFLAKE + + ret.Config = &protos.Peer_SnowflakeConfig{ + SnowflakeConfig: snowflakeConfig, + } + + return ret +} + +// Cleanup drops the database. +func (s *SnowflakeTestHelper) Cleanup() error { + err := s.testClient.Close() + if err != nil { + return err + } + err = s.adminClient.ExecuteQuery(fmt.Sprintf("DROP DATABASE %s", s.testDatabaseName)) + if err != nil { + return err + } + return s.adminClient.Close() +} + +// RunCommand runs the given command. +func (s *SnowflakeTestHelper) RunCommand(command string) error { + return s.testClient.ExecuteQuery(command) +} + +// CountRows(tableName) returns the number of rows in the given table. +func (s *SnowflakeTestHelper) CountRows(tableName string) (int, error) { + res, err := s.testClient.CountRows(s.testSchemaName, tableName) + if err != nil { + return 0, err + } + + return int(res), nil +} + +func (s *SnowflakeTestHelper) CheckNull(tableName string, colNames []string) (bool, error) { + return s.testClient.CheckNull(s.testSchemaName, tableName, colNames) +} + +func (s *SnowflakeTestHelper) ExecuteAndProcessQuery(query string) (*model.QRecordBatch, error) { + return s.testClient.ExecuteAndProcessQuery(query) +} + +func (s *SnowflakeTestHelper) CreateTable(tableName string, schema *model.QRecordSchema) error { + return s.testClient.CreateTable(schema, s.testSchemaName, tableName) +} diff --git a/flow/e2e/snowflake_helper.go b/flow/e2e/snowflake_helper.go deleted file mode 100644 index 090ddeccb..000000000 --- a/flow/e2e/snowflake_helper.go +++ /dev/null @@ -1,104 +0,0 @@ -package e2e - -import ( - "context" - "encoding/json" - "fmt" - "os" - - connsnowflake "github.com/PeerDB-io/peer-flow/connectors/snowflake" - "github.com/PeerDB-io/peer-flow/generated/protos" - "github.com/PeerDB-io/peer-flow/model" -) - -type SnowflakeTestHelper struct { - // config is the Snowflake config. - Config *protos.SnowflakeConfig - // peer struct holder Snowflake - Peer *protos.Peer - // connection to Snowflake - client *connsnowflake.SnowflakeClient - // testSchemaName is the schema to use for testing. - testSchemaName string -} - -func NewSnowflakeTestHelper(testSchemaName string) (*SnowflakeTestHelper, error) { - jsonPath := os.Getenv("TEST_SF_CREDS") - if jsonPath == "" { - return nil, fmt.Errorf("TEST_SF_CREDS env var not set") - } - - content, err := readFileToBytes(jsonPath) - if err != nil { - return nil, fmt.Errorf("failed to read file: %w", err) - } - - var config protos.SnowflakeConfig - err = json.Unmarshal(content, &config) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal json: %w", err) - } - - peer := generateSFPeer(&config) - - client, err := connsnowflake.NewSnowflakeClient(context.Background(), &config) - if err != nil { - return nil, fmt.Errorf("failed to create Snowflake client: %w", err) - } - - return &SnowflakeTestHelper{ - Config: &config, - Peer: peer, - client: client, - testSchemaName: testSchemaName, - }, nil -} - -func generateSFPeer(snowflakeConfig *protos.SnowflakeConfig) *protos.Peer { - ret := &protos.Peer{} - ret.Name = "test_sf_peer" - ret.Type = protos.DBType_SNOWFLAKE - - ret.Config = &protos.Peer_SnowflakeConfig{ - SnowflakeConfig: snowflakeConfig, - } - - return ret -} - -// RecreateSchema recreates the schema, i.e., drops it if exists and creates it again. -func (s *SnowflakeTestHelper) RecreateSchema() error { - return s.client.RecreateSchema(s.testSchemaName) -} - -// DropSchema drops the schema. -func (s *SnowflakeTestHelper) DropSchema() error { - return s.client.DropSchema(s.testSchemaName) -} - -// RunCommand runs the given command. -func (s *SnowflakeTestHelper) RunCommand(command string) error { - return s.client.ExecuteQuery(command) -} - -// CountRows(tableName) returns the number of rows in the given table. -func (s *SnowflakeTestHelper) CountRows(tableName string) (int, error) { - res, err := s.client.CountRows(s.testSchemaName, tableName) - if err != nil { - return 0, err - } - - return int(res), nil -} - -func (s *SnowflakeTestHelper) CheckNull(tableName string, colNames []string) (bool, error) { - return s.client.CheckNull(s.testSchemaName, tableName, colNames) -} - -func (s *SnowflakeTestHelper) ExecuteAndProcessQuery(query string) (*model.QRecordBatch, error) { - return s.client.ExecuteAndProcessQuery(query) -} - -func (s *SnowflakeTestHelper) CreateTable(tableName string, schema *model.QRecordSchema) error { - return s.client.CreateTable(schema, s.testSchemaName, tableName) -} diff --git a/flow/e2e/qrep_flow_sqlserver_test.go b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go similarity index 53% rename from flow/e2e/qrep_flow_sqlserver_test.go rename to flow/e2e/sqlserver/qrep_flow_sqlserver_test.go index 80fe7e1e6..4a072f201 100644 --- a/flow/e2e/qrep_flow_sqlserver_test.go +++ b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go @@ -1,23 +1,92 @@ -package e2e +package e2e_sqlserver import ( "context" "fmt" + "os" + "testing" "time" + "github.com/PeerDB-io/peer-flow/e2e" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model" "github.com/PeerDB-io/peer-flow/model/qvalue" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/joho/godotenv" + log "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "go.temporal.io/sdk/testsuite" ) -func (s *E2EPeerFlowTestSuite) setupSQLServerTable(tableName string) { +const sqlserverSuffix = "s3" + +type PeerFlowE2ETestSuiteSQLServer struct { + suite.Suite + testsuite.WorkflowTestSuite + + pool *pgxpool.Pool + sqlsHelper *SQLServerHelper +} + +func TestPeerFlowE2ETestSuiteSQLServer(t *testing.T) { + suite.Run(t, new(PeerFlowE2ETestSuiteSQLServer)) +} + +// setup sql server connection +func (s *PeerFlowE2ETestSuiteSQLServer) setupSQLServer() { + env := os.Getenv("ENABLE_SQLSERVER_TESTS") + if env != "true" { + s.sqlsHelper = nil + return + } + + sqlsHelper, err := NewSQLServerHelper("test_sqlserver_peer") + require.NoError(s.T(), err) + s.sqlsHelper = sqlsHelper +} + +func (s *PeerFlowE2ETestSuiteSQLServer) SetupSuite() { + err := godotenv.Load() + if err != nil { + // it's okay if the .env file is not present + // we will use the default values + log.Infof("Unable to load .env file, using default values from env") + } + + log.SetReportCaller(true) + + pool, err := e2e.SetupPostgres(sqlserverSuffix) + if err != nil { + s.Fail("failed to setup postgres", err) + } + s.pool = pool + + s.setupSQLServer() +} + +// Implement TearDownAllSuite interface to tear down the test suite +func (s *PeerFlowE2ETestSuiteSQLServer) TearDownSuite() { + err := e2e.TearDownPostgres(s.pool, sqlserverSuffix) + if err != nil { + s.Fail("failed to drop Postgres schema", err) + } + + if s.sqlsHelper != nil { + err = s.sqlsHelper.CleanUp() + if err != nil { + s.Fail("failed to clean up sqlserver", err) + } + } +} + +func (s *PeerFlowE2ETestSuiteSQLServer) setupSQLServerTable(tableName string) { schema := getSimpleTableSchema() err := s.sqlsHelper.CreateTable(schema, tableName) require.NoError(s.T(), err) } -func (s *E2EPeerFlowTestSuite) insertRowsIntoSQLServerTable(tableName string, numRows int) { +func (s *PeerFlowE2ETestSuiteSQLServer) insertRowsIntoSQLServerTable(tableName string, numRows int) { schemaQualified := fmt.Sprintf("%s.%s", s.sqlsHelper.SchemaName, tableName) for i := 0; i < numRows; i++ { params := make(map[string]interface{}) @@ -37,16 +106,15 @@ func (s *E2EPeerFlowTestSuite) insertRowsIntoSQLServerTable(tableName string, nu } } -func (s *E2EPeerFlowTestSuite) setupPGDestinationTable(schemaName, tableName string) { +func (s *PeerFlowE2ETestSuiteSQLServer) setupPGDestinationTable(tableName string) { ctx := context.Background() - _, err := s.pool.Exec(ctx, fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS %s", schemaName)) - require.NoError(s.T(), err) - _, err = s.pool.Exec(ctx, fmt.Sprintf("DROP TABLE IF EXISTS %s.%s", schemaName, tableName)) + _, err := s.pool.Exec(ctx, fmt.Sprintf("DROP TABLE IF EXISTS e2e_test_%s.%s", sqlserverSuffix, tableName)) require.NoError(s.T(), err) - //nolint:lll - _, err = s.pool.Exec(ctx, fmt.Sprintf("CREATE TABLE %s.%s (id TEXT, card_id TEXT, v_from TIMESTAMP, price NUMERIC, status INT)", schemaName, tableName)) + _, err = s.pool.Exec(ctx, + fmt.Sprintf("CREATE TABLE e2e_test_%s.%s (id TEXT, card_id TEXT, v_from TIMESTAMP, price NUMERIC, status INT)", + sqlserverSuffix, tableName)) require.NoError(s.T(), err) } @@ -62,13 +130,13 @@ func getSimpleTableSchema() *model.QRecordSchema { } } -func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_SqlServer_Append() { +func (s *PeerFlowE2ETestSuiteSQLServer) Test_Complete_QRep_Flow_SqlServer_Append() { if s.sqlsHelper == nil { s.T().Skip("Skipping SQL Server test") } env := s.NewTestWorkflowEnvironment() - registerWorkflowsAndActivities(env) + e2e.RegisterWorkflowsAndActivities(env) numRows := 10 tblName := "test_qrep_flow_avro_ss_append" @@ -77,13 +145,13 @@ func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_SqlServer_Append() { s.setupSQLServerTable(tblName) s.insertRowsIntoSQLServerTable(tblName, numRows) - s.setupPGDestinationTable(s.sqlsHelper.SchemaName, tblName) - dstTableName := fmt.Sprintf("%s.%s", s.sqlsHelper.SchemaName, tblName) + s.setupPGDestinationTable(tblName) + dstTableName := fmt.Sprintf("e2e_test_%s.%s", sqlserverSuffix, tblName) - //nolint:lll - query := fmt.Sprintf("SELECT * FROM %s.%s WHERE v_from BETWEEN {{.start}} AND {{.end}}", s.sqlsHelper.SchemaName, tblName) + query := fmt.Sprintf("SELECT * FROM %s.%s WHERE v_from BETWEEN {{.start}} AND {{.end}}", + s.sqlsHelper.SchemaName, tblName) - postgresPeer := GeneratePostgresPeer(postgresPort) + postgresPeer := e2e.GeneratePostgresPeer(e2e.PostgresPort) qrepConfig := &protos.QRepConfig{ FlowJobName: tblName, @@ -100,7 +168,7 @@ func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_SqlServer_Append() { WaitBetweenBatchesSeconds: 5, } - runQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/sqlserver_helper.go b/flow/e2e/sqlserver/sqlserver_helper.go similarity index 99% rename from flow/e2e/sqlserver_helper.go rename to flow/e2e/sqlserver/sqlserver_helper.go index 38933b35e..1be94fc83 100644 --- a/flow/e2e/sqlserver_helper.go +++ b/flow/e2e/sqlserver/sqlserver_helper.go @@ -1,4 +1,4 @@ -package e2e +package e2e_sqlserver import ( "context" diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 293af9095..761e0cbf5 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -1,13 +1,27 @@ package e2e import ( + "context" + "encoding/json" "fmt" "io" "os" + "strings" + "time" + + "github.com/PeerDB-io/peer-flow/activities" + "github.com/PeerDB-io/peer-flow/generated/protos" + "github.com/PeerDB-io/peer-flow/model" + "github.com/PeerDB-io/peer-flow/model/qvalue" + peerflow "github.com/PeerDB-io/peer-flow/workflows" + "github.com/google/uuid" + "github.com/jackc/pgx/v5/pgxpool" + log "github.com/sirupsen/logrus" + "go.temporal.io/sdk/testsuite" ) // readFileToBytes reads a file to a byte array. -func readFileToBytes(path string) ([]byte, error) { +func ReadFileToBytes(path string) ([]byte, error) { var ret []byte f, err := os.Open(path) @@ -24,3 +38,277 @@ func readFileToBytes(path string) ([]byte, error) { return ret, nil } + +func RegisterWorkflowsAndActivities(env *testsuite.TestWorkflowEnvironment) { + // set a 300 second timeout for the workflow to execute a few runs. + env.SetTestTimeout(300 * time.Second) + + env.RegisterWorkflow(peerflow.PeerFlowWorkflow) + env.RegisterWorkflow(peerflow.PeerFlowWorkflowWithConfig) + env.RegisterWorkflow(peerflow.SyncFlowWorkflow) + env.RegisterWorkflow(peerflow.SetupFlowWorkflow) + env.RegisterWorkflow(peerflow.SnapshotFlowWorkflow) + env.RegisterWorkflow(peerflow.NormalizeFlowWorkflow) + env.RegisterWorkflow(peerflow.QRepFlowWorkflow) + env.RegisterWorkflow(peerflow.QRepPartitionWorkflow) + env.RegisterActivity(&activities.FetchConfigActivity{}) + env.RegisterActivity(&activities.FlowableActivity{}) + env.RegisterActivity(&activities.SnapshotActivity{}) +} + +func SetupPeerFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, + connectionGen FlowConnectionGenerationConfig) { + // wait for PeerFlowStatusQuery to finish setup + // sleep for 5 second to allow the workflow to start + time.Sleep(5 * time.Second) + for { + response, err := env.QueryWorkflow( + peerflow.PeerFlowStatusQuery, + connectionGen.FlowJobName, + ) + if err == nil { + var state peerflow.PeerFlowState + err = response.Get(&state) + if err != nil { + log.Errorln(err) + } + + if state.SetupComplete { + fmt.Println("query indicates setup is complete") + break + } + } else { + // log the error for informational purposes + log.Errorln(err) + } + time.Sleep(1 * time.Second) + } +} + +func CreateSourceTableQRep(pool *pgxpool.Pool, suffix string, tableName string) error { + tblFields := []string{ + "id UUID NOT NULL PRIMARY KEY", + "card_id UUID", + `"from" TIMESTAMP NOT NULL`, + "price NUMERIC", + "created_at TIMESTAMP NOT NULL", + "updated_at TIMESTAMP NOT NULL", + "transaction_hash BYTEA", + "ownerable_type VARCHAR", + "ownerable_id UUID", + "user_nonce INTEGER", + "transfer_type INTEGER DEFAULT 0 NOT NULL", + "blockchain INTEGER NOT NULL", + "deal_type VARCHAR", + "deal_id UUID", + "ethereum_transaction_id UUID", + "ignore_price BOOLEAN DEFAULT false", + "card_eth_value DOUBLE PRECISION", + "paid_eth_price DOUBLE PRECISION", + "card_bought_notified BOOLEAN DEFAULT false NOT NULL", + "address NUMERIC", + "account_id UUID", + "asset_id NUMERIC NOT NULL", + "status INTEGER", + "transaction_id UUID", + "settled_at TIMESTAMP", + "reference_id VARCHAR", + "settle_at TIMESTAMP", + "settlement_delay_reason INTEGER", + "f1 text[]", + "f2 bigint[]", + "f3 int[]", + "f4 varchar[]", + "f5 jsonb", + "f6 jsonb", + "f7 jsonb", + "f8 smallint", + } + + tblFieldStr := strings.Join(tblFields, ",") + + _, err := pool.Exec(context.Background(), fmt.Sprintf(` + CREATE TABLE e2e_test_%s.%s ( + %s + );`, suffix, tableName, tblFieldStr)) + if err != nil { + return err + } + + fmt.Printf("created table on postgres: e2e_test_%s.%s\n", suffix, tableName) + return nil +} + +func generate20MBJson() ([]byte, error) { + xn := make(map[string]interface{}) + for i := 0; i < 215000; i++ { + xn[uuid.New().String()] = uuid.New().String() + } + + v, err := json.Marshal(xn) + if err != nil { + return nil, err + } + + return v, nil +} + +func PopulateSourceTable(pool *pgxpool.Pool, suffix string, tableName string, rowCount int) error { + var ids []string + var rows []string + for i := 0; i < rowCount-1; i++ { + id := uuid.New().String() + ids = append(ids, id) + row := fmt.Sprintf(` + ( + '%s', '%s', CURRENT_TIMESTAMP, 3.86487206688919, CURRENT_TIMESTAMP, + CURRENT_TIMESTAMP, E'\\\\xDEADBEEF', 'type1', '%s', + 1, 0, 1, 'dealType1', + '%s', '%s', false, 1.2345, + 1.2345, false, 12345, '%s', + 12345, 1, '%s', CURRENT_TIMESTAMP, 'refID', + CURRENT_TIMESTAMP, 1, ARRAY['text1', 'text2'], ARRAY[123, 456], ARRAY[789, 012], + ARRAY['varchar1', 'varchar2'], '{"key": 8.5}', + '[{"key1": "value1", "key2": "value2", "key3": "value3"}]', + '{"key": "value"}', 15 + )`, + id, uuid.New().String(), uuid.New().String(), + uuid.New().String(), uuid.New().String(), uuid.New().String(), uuid.New().String()) + rows = append(rows, row) + } + + _, err := pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO e2e_test_%s.%s ( + id, card_id, "from", price, created_at, + updated_at, transaction_hash, ownerable_type, ownerable_id, + user_nonce, transfer_type, blockchain, deal_type, + deal_id, ethereum_transaction_id, ignore_price, card_eth_value, + paid_eth_price, card_bought_notified, address, account_id, + asset_id, status, transaction_id, settled_at, reference_id, + settle_at, settlement_delay_reason, f1, f2, f3, f4, f5, f6, f7, f8 + ) VALUES %s; + `, suffix, tableName, strings.Join(rows, ","))) + if err != nil { + return err + } + + // add a row where all the nullable fields are null + _, err = pool.Exec(context.Background(), fmt.Sprintf(` + INSERT INTO e2e_test_%s.%s ( + id, "from", created_at, updated_at, + transfer_type, blockchain, card_bought_notified, asset_id + ) VALUES ( + '%s', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, + 0, 1, false, 12345 + ); + `, suffix, tableName, uuid.New().String())) + if err != nil { + return err + } + + // generate a 20 MB json and update id[0]'s col f5 to it + v, err := generate20MBJson() + if err != nil { + return err + } + _, err = pool.Exec(context.Background(), fmt.Sprintf(` + UPDATE e2e_test_%s.%s SET f5 = $1 WHERE id = $2; + `, suffix, tableName), v, ids[0]) + if err != nil { + return err + } + + return nil +} + +func CreateQRepWorkflowConfig( + flowJobName string, + sourceTable string, + dstTable string, + query string, + syncMode protos.QRepSyncMode, + dest *protos.Peer, + stagingPath string, +) (*protos.QRepConfig, error) { + connectionGen := QRepFlowConnectionGenerationConfig{ + FlowJobName: flowJobName, + WatermarkTable: sourceTable, + DestinationTableIdentifier: dstTable, + PostgresPort: PostgresPort, + Destination: dest, + StagingPath: stagingPath, + } + + watermark := "updated_at" + + qrepConfig, err := connectionGen.GenerateQRepConfig(query, watermark, syncMode) + if err != nil { + return nil, err + } + + qrepConfig.InitialCopyOnly = true + + return qrepConfig, nil +} + +func RunQrepFlowWorkflow(env *testsuite.TestWorkflowEnvironment, config *protos.QRepConfig) { + lastPartition := &protos.QRepPartition{ + PartitionId: "not-applicable-partition", + Range: nil, + } + numPartitionsProcessed := 0 + env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, lastPartition, numPartitionsProcessed) +} + +func GetOwnersSchema() *model.QRecordSchema { + return &model.QRecordSchema{ + Fields: []*model.QField{ + {Name: "id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "card_id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "from", Type: qvalue.QValueKindTimestamp, Nullable: true}, + {Name: "price", Type: qvalue.QValueKindNumeric, Nullable: true}, + {Name: "created_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, + {Name: "updated_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, + {Name: "transaction_hash", Type: qvalue.QValueKindBytes, Nullable: true}, + {Name: "ownerable_type", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "ownerable_id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "user_nonce", Type: qvalue.QValueKindInt64, Nullable: true}, + {Name: "transfer_type", Type: qvalue.QValueKindInt64, Nullable: true}, + {Name: "blockchain", Type: qvalue.QValueKindInt64, Nullable: true}, + {Name: "deal_type", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "deal_id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "ethereum_transaction_id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "ignore_price", Type: qvalue.QValueKindBoolean, Nullable: true}, + {Name: "card_eth_value", Type: qvalue.QValueKindFloat64, Nullable: true}, + {Name: "paid_eth_price", Type: qvalue.QValueKindFloat64, Nullable: true}, + {Name: "card_bought_notified", Type: qvalue.QValueKindBoolean, Nullable: true}, + {Name: "address", Type: qvalue.QValueKindNumeric, Nullable: true}, + {Name: "account_id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "asset_id", Type: qvalue.QValueKindNumeric, Nullable: true}, + {Name: "status", Type: qvalue.QValueKindInt64, Nullable: true}, + {Name: "transaction_id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "settled_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, + {Name: "reference_id", Type: qvalue.QValueKindString, Nullable: true}, + {Name: "settle_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, + {Name: "settlement_delay_reason", Type: qvalue.QValueKindInt64, Nullable: true}, + {Name: "f1", Type: qvalue.QValueKindArrayString, Nullable: true}, + {Name: "f2", Type: qvalue.QValueKindArrayInt64, Nullable: true}, + {Name: "f3", Type: qvalue.QValueKindArrayInt32, Nullable: true}, + {Name: "f4", Type: qvalue.QValueKindArrayString, Nullable: true}, + {Name: "f5", Type: qvalue.QValueKindJSON, Nullable: true}, + {Name: "f6", Type: qvalue.QValueKindJSON, Nullable: true}, + {Name: "f7", Type: qvalue.QValueKindJSON, Nullable: true}, + {Name: "f8", Type: qvalue.QValueKindInt16, Nullable: true}, + }, + } +} + +func GetOwnersSelectorString() string { + schema := GetOwnersSchema() + var fields []string + for _, field := range schema.Fields { + // append quoted field name + fields = append(fields, fmt.Sprintf(`"%s"`, field.Name)) + } + return strings.Join(fields, ",") +}