From 9931a1502b65f9a008709ded67a6bec21f1adc6d Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Fri, 17 Nov 2023 00:13:10 +0530 Subject: [PATCH 01/10] test fixing with a side of BigQuery features --- flow/connectors/bigquery/bigquery.go | 9 +- flow/connectors/bigquery/qrep_avro_sync.go | 158 +++++++------- flow/connectors/s3/qrep.go | 3 +- .../snowflake/avro_file_writer_test.go | 66 +++++- flow/connectors/snowflake/qrep_avro_sync.go | 7 +- flow/connectors/utils/avro/avro_writer.go | 63 +++--- flow/e2e/bigquery/peer_flow_bq_test.go | 193 ++++-------------- flow/e2e/bigquery/qrep_flow_bq_test.go | 42 +--- flow/e2e/postgres/qrep_flow_pg_test.go | 2 +- flow/e2e/s3/qrep_flow_s3_test.go | 4 +- flow/e2e/snowflake/peer_flow_sf_test.go | 188 +++-------------- flow/e2e/snowflake/qrep_flow_sf_test.go | 10 +- .../e2e/sqlserver/qrep_flow_sqlserver_test.go | 2 +- flow/e2e/test_utils.go | 15 +- flow/generated/protos/flow.pb.go | 58 +++--- flow/go.sum | 2 + flow/workflows/qrep_flow.go | 25 ++- nexus/pt/src/peerdb_flow.rs | 6 +- nexus/pt/src/peerdb_flow.serde.rs | 18 ++ protos/flow.proto | 5 +- ui/grpc_generated/flow.ts | 22 +- 21 files changed, 360 insertions(+), 538 deletions(-) diff --git a/flow/connectors/bigquery/bigquery.go b/flow/connectors/bigquery/bigquery.go index 776b6cc828..c216ac5233 100644 --- a/flow/connectors/bigquery/bigquery.go +++ b/flow/connectors/bigquery/bigquery.go @@ -1242,20 +1242,13 @@ func (c *BigQueryConnector) grabJobsUpdateLock() (func() error, error) { // grab an advisory lock based on the mirror jobs table hash mjTbl := fmt.Sprintf("%s.%s", c.datasetID, MirrorJobsTable) - _, err = tx.Exec(c.ctx, "SELECT pg_advisory_lock(hashtext($1))", mjTbl) - + _, err = tx.Exec(c.ctx, "SELECT pg_advisory_xact_lock(hashtext($1))", mjTbl) if err != nil { err = tx.Rollback(c.ctx) return nil, fmt.Errorf("failed to grab lock on %s: %w", mjTbl, err) } return func() error { - // release the lock - _, err := tx.Exec(c.ctx, "SELECT pg_advisory_unlock(hashtext($1))", mjTbl) - if err != nil { - return fmt.Errorf("failed to release lock on %s: %w", mjTbl, err) - } - err = tx.Commit(c.ctx) if err != nil { return fmt.Errorf("failed to commit transaction: %w", err) diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index 8cb8af79ce..7a44352fc0 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -1,19 +1,18 @@ package connbigquery import ( - "bytes" - "context" "encoding/json" "fmt" + "os" "strings" "time" "cloud.google.com/go/bigquery" "github.com/PeerDB-io/peer-flow/connectors/utils" + avro "github.com/PeerDB-io/peer-flow/connectors/utils/avro" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model" "github.com/PeerDB-io/peer-flow/model/qvalue" - "github.com/linkedin/goavro/v2" log "github.com/sirupsen/logrus" "go.temporal.io/sdk/activity" ) @@ -44,13 +43,13 @@ func (s *QRepAvroSyncMethod) SyncRecords( flowJobName, dstTableName, syncBatchID), ) // You will need to define your Avro schema as a string - avroSchema, nullable, err := DefineAvroSchema(dstTableName, dstTableMetadata) + avroSchema, err := DefineAvroSchema(dstTableName, dstTableMetadata) if err != nil { return 0, fmt.Errorf("failed to define Avro schema: %w", err) } stagingTable := fmt.Sprintf("%s_%s_staging", dstTableName, fmt.Sprint(syncBatchID)) - numRecords, err := s.writeToStage(fmt.Sprint(syncBatchID), dstTableName, avroSchema, stagingTable, stream, nullable) + numRecords, err := s.writeToStage(fmt.Sprint(syncBatchID), dstTableName, avroSchema, stagingTable, stream) if err != nil { return -1, fmt.Errorf("failed to push to avro stage: %v", err) } @@ -106,7 +105,7 @@ func (s *QRepAvroSyncMethod) SyncQRepRecords( startTime := time.Now() // You will need to define your Avro schema as a string - avroSchema, nullable, err := DefineAvroSchema(dstTableName, dstTableMetadata) + avroSchema, err := DefineAvroSchema(dstTableName, dstTableMetadata) if err != nil { return 0, fmt.Errorf("failed to define Avro schema: %w", err) } @@ -114,10 +113,12 @@ func (s *QRepAvroSyncMethod) SyncQRepRecords( "flowName": flowJobName, }).Infof("Obtained Avro schema for destination table %s and partition ID %s", dstTableName, partition.PartitionId) - fmt.Printf("Avro schema: %s\n", avroSchema) + log.WithFields(log.Fields{ + "flowName": flowJobName, + }).Infof("Avro schema: %v\n", avroSchema) // create a staging table name with partitionID replace hyphens with underscores stagingTable := fmt.Sprintf("%s_%s_staging", dstTableName, strings.ReplaceAll(partition.PartitionId, "-", "_")) - numRecords, err := s.writeToStage(partition.PartitionId, flowJobName, avroSchema, stagingTable, stream, nullable) + numRecords, err := s.writeToStage(partition.PartitionId, flowJobName, avroSchema, stagingTable, stream) if err != nil { return -1, fmt.Errorf("failed to push to avro stage: %v", err) } @@ -182,14 +183,15 @@ type AvroSchema struct { Fields []AvroField `json:"fields"` } -func DefineAvroSchema(dstTableName string, dstTableMetadata *bigquery.TableMetadata) (string, map[string]bool, error) { +func DefineAvroSchema(dstTableName string, + dstTableMetadata *bigquery.TableMetadata) (*model.QRecordAvroSchemaDefinition, error) { avroFields := []AvroField{} nullableFields := map[string]bool{} for _, bqField := range dstTableMetadata.Schema { avroType, err := GetAvroType(bqField) if err != nil { - return "", nil, err + return nil, err } // If a field is nullable, its Avro type should be ["null", actualType] @@ -212,10 +214,13 @@ func DefineAvroSchema(dstTableName string, dstTableMetadata *bigquery.TableMetad avroSchemaJSON, err := json.Marshal(avroSchema) if err != nil { - return "", nil, fmt.Errorf("failed to marshal Avro schema to JSON: %v", err) + return nil, fmt.Errorf("failed to marshal Avro schema to JSON: %v", err) } - return string(avroSchemaJSON), nullableFields, nil + return &model.QRecordAvroSchemaDefinition{ + Schema: string(avroSchemaJSON), + NullableFields: nullableFields, + }, nil } func GetAvroType(bqField *bigquery.FieldSchema) (interface{}, error) { @@ -306,10 +311,9 @@ func GetAvroType(bqField *bigquery.FieldSchema) (interface{}, error) { func (s *QRepAvroSyncMethod) writeToStage( syncID string, objectFolder string, - avroSchema string, + avroSchema *model.QRecordAvroSchemaDefinition, stagingTable string, stream *model.QRecordStream, - nullable map[string]bool, ) (int, error) { shutdown := utils.HeartbeatRoutine(s.connector.ctx, time.Minute, func() string { @@ -320,95 +324,71 @@ func (s *QRepAvroSyncMethod) writeToStage( defer func() { shutdown <- true }() - ctx := context.Background() - bucket := s.connector.storageClient.Bucket(s.gcsBucket) - gcsObjectName := fmt.Sprintf("%s/%s.avro", objectFolder, syncID) - - obj := bucket.Object(gcsObjectName) - w := obj.NewWriter(ctx) - - // Create OCF Writer - var ocfFileContents bytes.Buffer - ocfWriter, err := goavro.NewOCFWriter(goavro.OCFConfig{ - W: &ocfFileContents, - Schema: avroSchema, - }) - if err != nil { - return 0, fmt.Errorf("failed to create OCF writer: %w", err) - } - schema, err := stream.Schema() - if err != nil { - log.WithFields(log.Fields{ - "partitonOrBatchID": syncID, - }).Errorf("failed to get schema from stream: %v", err) - return 0, fmt.Errorf("failed to get schema from stream: %w", err) - } + var avroFilePath string + numRecords, err := func() (int, error) { + ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, + avro.CompressSnappy, qvalue.QDWHTypeBigQuery) + if s.gcsBucket != "" { + bucket := s.connector.storageClient.Bucket(s.gcsBucket) + avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", objectFolder, syncID) + obj := bucket.Object(avroFilePath) + w := obj.NewWriter(s.connector.ctx) + + numRecords, err := ocfWriter.WriteOCF(w) + if err != nil { + return 0, fmt.Errorf("failed to write records to Avro file on GCS: %w", err) + } + return numRecords, err + } else { + tmpDir, err := os.MkdirTemp("", "peerdb-avro") + if err != nil { + return 0, fmt.Errorf("failed to create temp dir: %w", err) + } - activity.RecordHeartbeat(s.connector.ctx, fmt.Sprintf( - "Obtained staging bucket %s and schema of rows. Now writing records to OCF file.", - gcsObjectName), - ) - numRecords := 0 - // Write each QRecord to the OCF file - for qRecordOrErr := range stream.Records { - if numRecords > 0 && numRecords%10000 == 0 { - activity.RecordHeartbeat(s.connector.ctx, fmt.Sprintf( - "Written %d records to OCF file for staging bucket %s.", - numRecords, gcsObjectName), - ) - } - if qRecordOrErr.Err != nil { + avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", tmpDir, syncID) log.WithFields(log.Fields{ "batchOrPartitionID": syncID, - }).Errorf("[bq_avro] failed to get record from stream: %v", qRecordOrErr.Err) - return 0, fmt.Errorf("[bq_avro] failed to get record from stream: %w", qRecordOrErr.Err) - } - - qRecord := qRecordOrErr.Record - avroConverter := model.NewQRecordAvroConverter( - qRecord, - qvalue.QDWHTypeBigQuery, - &nullable, - schema.GetColumnNames(), - ) - avroMap, err := avroConverter.Convert() - if err != nil { - return 0, fmt.Errorf("failed to convert QRecord to Avro compatible map: %w", err) + }).Infof("writing records to local file %s", avroFilePath) + numRecords, err := ocfWriter.WriteRecordsToAvroFile(avroFilePath) + if err != nil { + return 0, fmt.Errorf("failed to write records to local Avro file: %w", err) + } + return numRecords, err } + }() + if err != nil { + return 0, err + } + log.WithFields(log.Fields{ + "batchOrPartitionID": syncID, + }).Infof("wrote %d records to file %s", numRecords, avroFilePath) - err = ocfWriter.Append([]interface{}{avroMap}) + bqClient := s.connector.client + datasetID := s.connector.datasetID + var avroRef bigquery.LoadSource + if s.gcsBucket != "" { + gcsRef := bigquery.NewGCSReference(fmt.Sprintf("gs://%s/%s", s.gcsBucket, avroFilePath)) + gcsRef.SourceFormat = bigquery.Avro + avroRef = gcsRef + } else { + fh, err := os.Open(avroFilePath) if err != nil { - return 0, fmt.Errorf("failed to write record to OCF file: %w", err) + return 0, fmt.Errorf("failed to read local Avro file: %w", err) } - numRecords++ - } - activity.RecordHeartbeat(s.connector.ctx, fmt.Sprintf( - "Writing OCF contents to BigQuery for partition/batch ID %s", - syncID), - ) - // Write OCF contents to GCS - if _, err = w.Write(ocfFileContents.Bytes()); err != nil { - return 0, fmt.Errorf("failed to write OCF file to GCS: %w", err) + localRef := bigquery.NewReaderSource(fh) + localRef.SourceFormat = bigquery.Avro + avroRef = localRef } - if err := w.Close(); err != nil { - return 0, fmt.Errorf("failed to close GCS object writer: %w", err) - } - - // write this file to bigquery - gcsRef := bigquery.NewGCSReference(fmt.Sprintf("gs://%s/%s", s.gcsBucket, gcsObjectName)) - gcsRef.SourceFormat = bigquery.Avro - bqClient := s.connector.client - datasetID := s.connector.datasetID - loader := bqClient.Dataset(datasetID).Table(stagingTable).LoaderFrom(gcsRef) + loader := bqClient.Dataset(datasetID).Table(stagingTable).LoaderFrom(avroRef) loader.UseAvroLogicalTypes = true - job, err := loader.Run(ctx) + job, err := loader.Run(s.connector.ctx) if err != nil { return 0, fmt.Errorf("failed to run BigQuery load job: %w", err) } - status, err := job.Wait(ctx) + status, err := job.Wait(s.connector.ctx) if err != nil { return 0, fmt.Errorf("failed to wait for BigQuery load job: %w", err) } @@ -417,6 +397,6 @@ func (s *QRepAvroSyncMethod) writeToStage( return 0, fmt.Errorf("failed to load Avro file into BigQuery table: %w", err) } log.Printf("Pushed into %s/%s", - gcsObjectName, syncID) + avroFilePath, syncID) return numRecords, nil } diff --git a/flow/connectors/s3/qrep.go b/flow/connectors/s3/qrep.go index b34f9a2cf3..1f1cf881da 100644 --- a/flow/connectors/s3/qrep.go +++ b/flow/connectors/s3/qrep.go @@ -7,6 +7,7 @@ import ( avro "github.com/PeerDB-io/peer-flow/connectors/utils/avro" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model" + "github.com/PeerDB-io/peer-flow/model/qvalue" log "github.com/sirupsen/logrus" ) @@ -62,7 +63,7 @@ func (c *S3Connector) writeToAvroFile( } s3AvroFileKey := fmt.Sprintf("%s/%s/%s.avro", s3o.Prefix, jobName, partitionID) - writer := avro.NewPeerDBOCFWriter(c.ctx, stream, avroSchema) + writer := avro.NewPeerDBOCFWriter(c.ctx, stream, avroSchema, avro.CompressNone, qvalue.QDWHTypeSnowflake) numRecords, err := writer.WriteRecordsToS3(s3o.Bucket, s3AvroFileKey, c.creds) if err != nil { return 0, fmt.Errorf("failed to write records to S3: %w", err) diff --git a/flow/connectors/snowflake/avro_file_writer_test.go b/flow/connectors/snowflake/avro_file_writer_test.go index 77310c45db..76b70f478f 100644 --- a/flow/connectors/snowflake/avro_file_writer_test.go +++ b/flow/connectors/snowflake/avro_file_writer_test.go @@ -1,6 +1,7 @@ package connsnowflake import ( + "context" "fmt" "math/big" "os" @@ -142,7 +143,64 @@ func TestWriteRecordsToAvroFileHappyPath(t *testing.T) { fmt.Printf("[test] avroSchema: %v\n", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(nil, records, avroSchema) + writer := avro.NewPeerDBOCFWriter(context.Background(), + records, avroSchema, avro.CompressNone, qvalue.QDWHTypeSnowflake) + _, err = writer.WriteRecordsToAvroFile(tmpfile.Name()) + require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") + + // Check file is not empty + info, err := tmpfile.Stat() + require.NoError(t, err) + require.NotZero(t, info.Size(), "expected file to not be empty") +} + +func TestWriteRecordsToZstdAvroFileHappyPath(t *testing.T) { + // Create temporary file + tmpfile, err := os.CreateTemp("", "example_*.avro.zst") + require.NoError(t, err) + + defer os.Remove(tmpfile.Name()) // clean up + defer tmpfile.Close() // close file after test ends + + // Define sample data + records, schema := generateRecords(t, true, 10, false) + + avroSchema, err := model.GetAvroSchemaDefinition("not_applicable", schema) + require.NoError(t, err) + + fmt.Printf("[test] avroSchema: %v\n", avroSchema) + + // Call function + writer := avro.NewPeerDBOCFWriter(context.Background(), + records, avroSchema, avro.CompressZstd, qvalue.QDWHTypeSnowflake) + _, err = writer.WriteRecordsToAvroFile(tmpfile.Name()) + require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") + + // Check file is not empty + info, err := tmpfile.Stat() + require.NoError(t, err) + require.NotZero(t, info.Size(), "expected file to not be empty") +} + +func TestWriteRecordsToDeflateAvroFileHappyPath(t *testing.T) { + // Create temporary file + tmpfile, err := os.CreateTemp("", "example_*.avro.zz") + require.NoError(t, err) + + defer os.Remove(tmpfile.Name()) // clean up + defer tmpfile.Close() // close file after test ends + + // Define sample data + records, schema := generateRecords(t, true, 10, false) + + avroSchema, err := model.GetAvroSchemaDefinition("not_applicable", schema) + require.NoError(t, err) + + fmt.Printf("[test] avroSchema: %v\n", avroSchema) + + // Call function + writer := avro.NewPeerDBOCFWriter(context.Background(), + records, avroSchema, avro.CompressDeflate, qvalue.QDWHTypeSnowflake) _, err = writer.WriteRecordsToAvroFile(tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") @@ -168,7 +226,8 @@ func TestWriteRecordsToAvroFileNonNull(t *testing.T) { fmt.Printf("[test] avroSchema: %v\n", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(nil, records, avroSchema) + writer := avro.NewPeerDBOCFWriter(context.Background(), + records, avroSchema, avro.CompressNone, qvalue.QDWHTypeSnowflake) _, err = writer.WriteRecordsToAvroFile(tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") @@ -195,7 +254,8 @@ func TestWriteRecordsToAvroFileAllNulls(t *testing.T) { fmt.Printf("[test] avroSchema: %v\n", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(nil, records, avroSchema) + writer := avro.NewPeerDBOCFWriter(context.Background(), + records, avroSchema, avro.CompressNone, qvalue.QDWHTypeSnowflake) _, err = writer.WriteRecordsToAvroFile(tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") diff --git a/flow/connectors/snowflake/qrep_avro_sync.go b/flow/connectors/snowflake/qrep_avro_sync.go index b41e97a192..fcadb3298b 100644 --- a/flow/connectors/snowflake/qrep_avro_sync.go +++ b/flow/connectors/snowflake/qrep_avro_sync.go @@ -274,7 +274,7 @@ func (s *SnowflakeAvroSyncMethod) writeToAvroFile( ) (int, string, error) { var numRecords int if s.config.StagingPath == "" { - ocfWriter := avro.NewPeerDBOCFWriterWithCompression(s.connector.ctx, stream, avroSchema) + ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, avro.CompressZstd, qvalue.QDWHTypeSnowflake) tmpDir, err := os.MkdirTemp("", "peerdb-avro") if err != nil { return 0, "", fmt.Errorf("failed to create temp dir: %w", err) @@ -292,13 +292,14 @@ func (s *SnowflakeAvroSyncMethod) writeToAvroFile( return numRecords, localFilePath, nil } else if strings.HasPrefix(s.config.StagingPath, "s3://") { - ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema) + ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, avro.CompressZstd, + qvalue.QDWHTypeSnowflake) s3o, err := utils.NewS3BucketAndPrefix(s.config.StagingPath) if err != nil { return 0, "", fmt.Errorf("failed to parse staging path: %w", err) } - s3AvroFileKey := fmt.Sprintf("%s/%s/%s.avro", s3o.Prefix, s.config.FlowJobName, partitionID) + s3AvroFileKey := fmt.Sprintf("%s/%s/%s.avro.zst", s3o.Prefix, s.config.FlowJobName, partitionID) log.WithFields(log.Fields{ "flowName": flowJobName, "partitionID": partitionID, diff --git a/flow/connectors/utils/avro/avro_writer.go b/flow/connectors/utils/avro/avro_writer.go index 0b4cf09d7e..36c8858aa4 100644 --- a/flow/connectors/utils/avro/avro_writer.go +++ b/flow/connectors/utils/avro/avro_writer.go @@ -13,56 +13,67 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/klauspost/compress/flate" + "github.com/klauspost/compress/snappy" "github.com/klauspost/compress/zstd" "github.com/linkedin/goavro/v2" log "github.com/sirupsen/logrus" uber_atomic "go.uber.org/atomic" ) +type AvroCompressionCodec int64 + +const ( + CompressNone AvroCompressionCodec = iota + CompressZstd + CompressDeflate + CompressSnappy +) + type PeerDBOCFWriter struct { - ctx context.Context - stream *model.QRecordStream - avroSchema *model.QRecordAvroSchemaDefinition - compress bool - writer io.WriteCloser + ctx context.Context + stream *model.QRecordStream + avroSchema *model.QRecordAvroSchemaDefinition + avroCompressionCodec AvroCompressionCodec + writer io.WriteCloser + targetDWH qvalue.QDWHType } func NewPeerDBOCFWriter( ctx context.Context, stream *model.QRecordStream, avroSchema *model.QRecordAvroSchemaDefinition, + avroCompressionCodec AvroCompressionCodec, + targetDWH qvalue.QDWHType, ) *PeerDBOCFWriter { return &PeerDBOCFWriter{ - ctx: ctx, - stream: stream, - avroSchema: avroSchema, - compress: false, - } -} - -func NewPeerDBOCFWriterWithCompression( - ctx context.Context, - stream *model.QRecordStream, - avroSchema *model.QRecordAvroSchemaDefinition, -) *PeerDBOCFWriter { - return &PeerDBOCFWriter{ - ctx: ctx, - stream: stream, - avroSchema: avroSchema, - compress: true, + ctx: ctx, + stream: stream, + avroSchema: avroSchema, + avroCompressionCodec: avroCompressionCodec, + targetDWH: targetDWH, } } func (p *PeerDBOCFWriter) initWriteCloser(w io.Writer) error { var err error - if p.compress { + switch p.avroCompressionCodec { + case CompressNone: + p.writer = &nopWriteCloser{w} + case CompressZstd: p.writer, err = zstd.NewWriter(w) if err != nil { return fmt.Errorf("error while initializing zstd encoding writer: %w", err) } - } else { - p.writer = &nopWriteCloser{w} + case CompressDeflate: + p.writer, err = flate.NewWriter(w, -1) + if err != nil { + return fmt.Errorf("error while initializing deflate encoding writer: %w", err) + } + case CompressSnappy: + p.writer = snappy.NewBufferedWriter(w) } + return nil } @@ -115,7 +126,7 @@ func (p *PeerDBOCFWriter) writeRecordsToOCFWriter(ocfWriter *goavro.OCFWriter) ( qRecord := qRecordOrErr.Record avroConverter := model.NewQRecordAvroConverter( qRecord, - qvalue.QDWHTypeSnowflake, + p.targetDWH, &p.avroSchema.NullableFields, colNames, ) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index be7f45ef4a..8d519b4f99 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -46,8 +46,6 @@ func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { {"Test_Toast_Advance_2_BQ", s.Test_Toast_Advance_2_BQ}, {"Test_Toast_Advance_3_BQ", s.Test_Toast_Advance_3_BQ}, {"Test_Types_BQ", s.Test_Types_BQ}, - {"Test_Types_Avro_BQ", s.Test_Types_Avro_BQ}, - {"Test_Simple_Flow_BQ_Avro_CDC", s.Test_Simple_Flow_BQ_Avro_CDC}, {"Test_Multi_Table_BQ", s.Test_Multi_Table_BQ}, {"Test_Simple_Schema_Changes_BQ", s.Test_Simple_Schema_Changes_BQ}, {"Test_Composite_PKey_BQ", s.Test_Composite_PKey_BQ}, @@ -65,6 +63,10 @@ func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { t.Run(tt.name, tt.test) } + + t.Cleanup(func() { + s.TearDownSuite() + }) } func (s *PeerFlowE2ETestSuiteBQ) attachSchemaSuffix(tableName string) string { @@ -185,6 +187,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -230,6 +234,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -278,6 +284,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -347,13 +355,15 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -416,13 +426,15 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -478,6 +490,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -552,13 +566,15 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -621,13 +637,15 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -689,85 +707,6 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) - - limits := peerflow.CDCFlowLimits{ - - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) - /* test inserting various types*/ - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s SELECT 2,2,b'1',b'101', - true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, - '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, - '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, - 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, - 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, - txid_current_snapshot(), - '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), - ARRAY[10299301,2579827], - ARRAY[0.0003, 8902.0092], - ARRAY['hello','bye']; - `, srcTableName)) - require.NoError(t, err) - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - noNulls, err := s.bqHelper.CheckNull(dstTableName, []string{"c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44"}) - if err != nil { - fmt.Println("error %w", err) - } - // Make sure that there are no nulls - s.True(noNulls) - - env.AssertExpectations(s.T()) -} - -func (s *PeerFlowE2ETestSuiteBQ) Test_Types_Avro_BQ(t *testing.T) { - t.Parallel() - env := s.NewTestWorkflowEnvironment() - e2e.RegisterWorkflowsAndActivities(env) - - srcTableName := s.attachSchemaSuffix("test_types_avro_bq") - dstTableName := "test_types_avro_bq" - - _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE IF NOT EXISTS %s (id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, - c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, - c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, - c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, - c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, - c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 INT[], c43 FLOAT[], c44 TEXT[]); - `, srcTableName)) - require.NoError(t, err) - - connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_types_avro_bq"), - TableNameMapping: map[string]string{srcTableName: dstTableName}, - PostgresPort: e2e.PostgresPort, - Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, CdcStagingPath: "peerdb_staging", } @@ -777,7 +716,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_Avro_BQ(t *testing.T) { limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -796,8 +735,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_Avro_BQ(t *testing.T) { 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, txid_current_snapshot(), '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), - ARRAY[9301,239827], - ARRAY[0.0003, 1039.0034], + ARRAY[10299301,2579827], + ARRAY[0.0003, 8902.0092], ARRAY['hello','bye']; `, srcTableName)) require.NoError(t, err) @@ -826,72 +765,6 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_Avro_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Flow_BQ_Avro_CDC(t *testing.T) { - t.Parallel() - env := s.NewTestWorkflowEnvironment() - e2e.RegisterWorkflowsAndActivities(env) - - srcTableName := s.attachSchemaSuffix("test_simple_flow_bq_avro_cdc") - dstTableName := "test_simple_flow_bq_avro_cdc" - - _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE IF NOT EXISTS %s ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `, srcTableName)) - require.NoError(t, err) - connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_simple_flow_bq_avro_cdc"), - TableNameMapping: map[string]string{srcTableName: dstTableName}, - PostgresPort: e2e.PostgresPort, - Destination: s.bqHelper.Peer, - CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) - - limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 2, - MaxBatchSize: 100, - } - - go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) - for i := 0; i < 10; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s (key, value) VALUES ($1, $2) - `, srcTableName), testKey, testValue) - require.NoError(t, err) - } - fmt.Println("Inserted 10 rows into the source table") - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - count, err := s.bqHelper.countRows(dstTableName) - require.NoError(t, err) - s.Equal(10, count) - - // TODO: verify that the data is correctly synced to the destination table - // on the bigquery side - - env.AssertExpectations(s.T()) -} - func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { t.Parallel() env := s.NewTestWorkflowEnvironment() @@ -913,13 +786,15 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTable1Name: dstTable1Name, srcTable2Name: dstTable2Name}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -975,6 +850,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -1079,6 +956,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -1154,6 +1033,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -1232,6 +1113,8 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, + CdcStagingPath: "peerdb_staging", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() diff --git a/flow/e2e/bigquery/qrep_flow_bq_test.go b/flow/e2e/bigquery/qrep_flow_bq_test.go index 8bd4b6135f..9183762cde 100644 --- a/flow/e2e/bigquery/qrep_flow_bq_test.go +++ b/flow/e2e/bigquery/qrep_flow_bq_test.go @@ -68,7 +68,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { s.bqHelper.Peer, "peerdb_staging") s.NoError(err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -81,43 +81,3 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { env.AssertExpectations(s.T()) } - -// NOTE: Disabled due to large JSON tests being added: https://github.com/PeerDB-io/peerdb/issues/309 - -// Test_Complete_QRep_Flow tests a complete flow with data in the source table. -// The test inserts 10 rows into the source table and verifies that the data is -// // correctly synced to the destination table this runs a QRep Flow. -// func (s *E2EPeerFlowTestSuite) Test_Complete_QRep_Flow_Multi_Insert() { -// env := s.NewTestWorkflowEnvironment() -// registerWorkflowsAndActivities(env) - -// numRows := 10 - -// tblName := "test_qrep_flow_multi_insert" -// s.setupSourceTable(tblName, numRows) -// s.setupBQDestinationTable(tblName) - -// query := fmt.Sprintf("SELECT * FROM e2e_test.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", tblName) - -// qrepConfig := s.createQRepWorkflowConfig("test_qrep_flow_mi", -// "e2e_test."+tblName, -// tblName, -// query, -// protos.QRepSyncMode_QREP_SYNC_MODE_MULTI_INSERT, -// s.bqHelper.Peer) -// runQrepFlowWorkflow(env, qrepConfig) - -// // Verify workflow completes without error -// s.True(env.IsWorkflowCompleted()) - -// // assert that error contains "invalid connection configs" -// err := env.GetWorkflowError() -// s.NoError(err) - -// count, err := s.bqHelper.CountRows(tblName) -// s.NoError(err) - -// s.Equal(numRows, count) - -// env.AssertExpectations(s.T()) -// } diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index df1653b992..52386711cf 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -171,7 +171,7 @@ func (s *PeerFlowE2ETestSuitePG) Test_Complete_QRep_Flow_Multi_Insert_PG() { ) s.NoError(err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go index 2fca18a700..c3845f5f16 100644 --- a/flow/e2e/s3/qrep_flow_s3_test.go +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -114,7 +114,7 @@ func (s *PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { s.NoError(err) qrepConfig.StagingPath = s.s3Helper.s3Config.Url - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -164,7 +164,7 @@ func (s *PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { qrepConfig.InitialCopyOnly = true qrepConfig.WatermarkColumn = "ctid" - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 95bb972a71..907da8ed89 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -41,7 +41,6 @@ func TestPeerFlowE2ETestSuiteSF(t *testing.T) { test func(t *testing.T) }{ {"Test_Complete_Simple_Flow_SF", s.Test_Complete_Simple_Flow_SF}, - {"Test_Complete_Simple_Flow_SF_Avro_CDC", s.Test_Complete_Simple_Flow_SF_Avro_CDC}, {"Test_Invalid_Geo_SF_Avro_CDC", s.Test_Invalid_Geo_SF_Avro_CDC}, {"Test_Toast_SF", s.Test_Toast_SF}, {"Test_Toast_Nochanges_SF", s.Test_Toast_Nochanges_SF}, @@ -49,7 +48,6 @@ func TestPeerFlowE2ETestSuiteSF(t *testing.T) { {"Test_Toast_Advance_2_SF", s.Test_Toast_Advance_2_SF}, {"Test_Toast_Advance_3_SF", s.Test_Toast_Advance_3_SF}, {"Test_Types_SF", s.Test_Types_SF}, - {"Test_Types_SF_Avro_CDC", s.Test_Types_SF_Avro_CDC}, {"Test_Multi_Table_SF", s.Test_Multi_Table_SF}, {"Test_Simple_Schema_Changes_SF", s.Test_Simple_Schema_Changes_SF}, {"Test_Composite_PKey_SF", s.Test_Composite_PKey_SF}, @@ -171,6 +169,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -182,11 +181,11 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { } // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and then insert 10 rows into the source table + // and then insert 15 rows into the source table go func() { e2e.SetupCDCFlowStatusQuery(env, connectionGen) - // insert 10 rows into the source table - for i := 0; i < 10; i++ { + // insert 15 rows into the source table + for i := 0; i < 20; i++ { testKey := fmt.Sprintf("test_key_%d", i) testValue := fmt.Sprintf("test_value_%d", i) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` @@ -209,7 +208,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { count, err := s.sfHelper.CountRows("test_simple_flow_sf") require.NoError(t, err) - s.Equal(10, count) + s.Equal(20, count) // check the number of rows where _PEERDB_SYNCED_AT is newer than 5 mins ago // it should match the count. @@ -218,80 +217,10 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { `, dstTableName) numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) require.NoError(t, err) - s.Equal(10, numNewRows) + s.Equal(20, numNewRows) // TODO: verify that the data is correctly synced to the destination table - // on the bigquery side - - env.AssertExpectations(s.T()) -} - -func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF_Avro_CDC(t *testing.T) { - t.Parallel() - env := s.NewTestWorkflowEnvironment() - e2e.RegisterWorkflowsAndActivities(env) - - tblConst := "test_simple_flow_sf_avro_cdc" - srcTableName := s.attachSchemaSuffix(tblConst) - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, tblConst) - - _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE IF NOT EXISTS %s ( - id SERIAL PRIMARY KEY, - key TEXT NOT NULL, - value TEXT NOT NULL - ); - `, srcTableName)) - require.NoError(t, err) - - connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_simple_flow_avro"), - TableNameMapping: map[string]string{srcTableName: dstTableName}, - PostgresPort: e2e.PostgresPort, - Destination: s.sfHelper.Peer, - CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) - - limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 2, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and then insert 10 rows into the source table - go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) - // insert 10 rows into the source table - for i := 0; i < 15; i++ { - testKey := fmt.Sprintf("test_key_%d", i) - testValue := fmt.Sprintf("test_value_%d", i) - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s (key, value) VALUES ($1, $2) - `, srcTableName), testKey, testValue) - require.NoError(t, err) - } - fmt.Println("Inserted 15 rows into the source table") - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - require.True(t, env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - count, err := s.sfHelper.CountRows(tblConst) - require.NoError(t, err) - s.Equal(15, count) - - // TODO: verify that the data is correctly synced to the destination table - // on the bigquery side + // on the Snowflake side env.AssertExpectations(s.T()) } @@ -407,13 +336,14 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -478,13 +408,14 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -541,6 +472,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -616,13 +548,14 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -686,13 +619,14 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -751,85 +685,6 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF(t *testing.T) { `, srcTableName, srcTableName)) require.NoError(t, err) - connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("test_types_sf"), - TableNameMapping: map[string]string{srcTableName: dstTableName}, - PostgresPort: e2e.PostgresPort, - Destination: s.sfHelper.Peer, - } - - flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) - - limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, - MaxBatchSize: 100, - } - - // in a separate goroutine, wait for PeerFlowStatusQuery to finish setup - // and execute a transaction touching toast columns - go func() { - e2e.SetupCDCFlowStatusQuery(env, connectionGen) - /* test inserting various types*/ - _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s SELECT 2,2,b'1',b'101', - true,random_bytea(32),'s','test','1.1.10.2'::cidr, - CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, - '5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval, - '{"sai":1}'::json,'{"sai":1}'::jsonb,'08:00:2b:01:02:03'::macaddr, - 1.2,1.23,4::oid,1.23,1,1,1,'test',now(),now(),now()::time,now()::timetz, - 'fat & rat'::tsquery,'a fat cat sat on a mat and ate a fat rat'::tsvector, - txid_current_snapshot(), - '66073c38-b8df-4bdb-bbca-1c97596b8940'::uuid,xmlcomment('hello'), - 'POINT(1 2)','POINT(40.7128 -74.0060)','POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))', - 'LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831)','LINESTRING(0 0, 1 1, 2 2)', - 'POLYGON((-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831, -74.0060 40.7128))'; - `, srcTableName)) - require.NoError(t, err) - }() - - env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) - - // Verify workflow completes without error - s.True(env.IsWorkflowCompleted()) - err = env.GetWorkflowError() - - // allow only continue as new error - s.Error(err) - s.Contains(err.Error(), "continue as new") - - noNulls, err := s.sfHelper.CheckNull("test_types_sf", []string{"c41", "c1", "c2", "c3", "c4", - "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", - "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", - "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46"}) - if err != nil { - fmt.Println("error %w", err) - } - // Make sure that there are no nulls - s.Equal(noNulls, true) - - env.AssertExpectations(s.T()) -} - -func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF_Avro_CDC(t *testing.T) { - t.Parallel() - env := s.NewTestWorkflowEnvironment() - e2e.RegisterWorkflowsAndActivities(env) - - srcTableName := s.attachSchemaSuffix("test_types_sf_avro_cdc") - dstTableName := fmt.Sprintf("%s.%s", s.sfHelper.testSchemaName, "test_types_sf_avro_cdc") - - _, err := s.pool.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE IF NOT EXISTS %s (id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c3 VARBIT,c4 BOOLEAN, - c6 BYTEA,c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, - c14 INET,c15 INTEGER,c16 INTERVAL,c17 JSON,c18 JSONB,c21 MACADDR,c22 MONEY, - c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, - c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, - c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 GEOMETRY(POINT), c43 GEOGRAPHY(POINT), - c44 GEOGRAPHY(POLYGON), c45 GEOGRAPHY(LINESTRING), c46 GEOMETRY(LINESTRING), c47 GEOMETRY(POLYGON)); - `, srcTableName)) - require.NoError(t, err) - connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_types_sf"), TableNameMapping: map[string]string{srcTableName: dstTableName}, @@ -842,7 +697,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF_Avro_CDC(t *testing.T) { require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -878,7 +733,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF_Avro_CDC(t *testing.T) { s.Error(err) s.Contains(err.Error(), "continue as new") - noNulls, err := s.sfHelper.CheckNull("test_types_sf_avro_cdc", []string{"c41", "c1", "c2", "c3", "c4", + noNulls, err := s.sfHelper.CheckNull("test_types_sf", []string{"c41", "c1", "c2", "c3", "c4", "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46"}) @@ -912,13 +767,14 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF(t *testing.T) { TableNameMapping: map[string]string{srcTable1Name: dstTable1Name, srcTable2Name: dstTable2Name}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() require.NoError(t, err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 1, + TotalSyncFlows: 2, MaxBatchSize: 100, } @@ -972,6 +828,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -1137,6 +994,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -1213,6 +1071,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() @@ -1291,6 +1150,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF(t *testing.T) { TableNameMapping: map[string]string{srcTableName: dstTableName}, PostgresPort: e2e.PostgresPort, Destination: s.sfHelper.Peer, + CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() diff --git a/flow/e2e/snowflake/qrep_flow_sf_test.go b/flow/e2e/snowflake/qrep_flow_sf_test.go index 82901beac2..c1516ca8df 100644 --- a/flow/e2e/snowflake/qrep_flow_sf_test.go +++ b/flow/e2e/snowflake/qrep_flow_sf_test.go @@ -81,7 +81,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { ) s.NoError(err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -126,7 +126,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() } s.NoError(err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -168,7 +168,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { s.NoError(err) qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -212,7 +212,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { qrepConfig.WatermarkColumn = "xmin" s.NoError(err) - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -256,7 +256,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration( s.NoError(err) qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go index 9c2b27bcb0..6a70377ac4 100644 --- a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go +++ b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go @@ -168,7 +168,7 @@ func (s *PeerFlowE2ETestSuiteSQLServer) Test_Complete_QRep_Flow_SqlServer_Append WaitBetweenBatchesSeconds: 5, } - e2e.RunQrepFlowWorkflow(env, qrepConfig) + e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index f26afc2ee1..d307039cdd 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -294,14 +294,21 @@ func CreateQRepWorkflowConfig( return nil, err } - qrepConfig.InitialCopyOnly = true - return qrepConfig, nil } -func RunQrepFlowWorkflow(env *testsuite.TestWorkflowEnvironment, config *protos.QRepConfig) { - state := peerflow.NewQRepFlowState() +func RunQrepFlowWorkflow(suite testsuite.WorkflowTestSuite, config *protos.QRepConfig) bool { + env := suite.NewTestWorkflowEnvironment() + RegisterWorkflowsAndActivities(env) + state := peerflow.NewQRepFlowStateForTesting() + env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, state) + if !env.IsWorkflowCompleted() { + return false + } + env = suite.NewTestWorkflowEnvironment() + RegisterWorkflowsAndActivities(env) env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, state) + return env.IsWorkflowCompleted() } func GetOwnersSchema() *model.QRecordSchema { diff --git a/flow/generated/protos/flow.pb.go b/flow/generated/protos/flow.pb.go index df610088a1..bf29f33454 100644 --- a/flow/generated/protos/flow.pb.go +++ b/flow/generated/protos/flow.pb.go @@ -2659,8 +2659,8 @@ type QRepConfig struct { // This is only used when sync_mode is AVRO // this is the location where the avro files will be written // if this starts with gs:// then it will be written to GCS - // if this starts with s3:// then it will be written to S3 - // if nothing is specified then it will be written to local disk, only supported in Snowflake + // if this starts with s3:// then it will be written to S3, only supported in Snowflake + // if nothing is specified then it will be written to local disk // if using GCS or S3 make sure your instance has the correct permissions. StagingPath string `protobuf:"bytes,15,opt,name=staging_path,json=stagingPath,proto3" json:"staging_path,omitempty"` // This setting overrides batch_size_int and batch_duration_seconds @@ -3225,6 +3225,7 @@ type QRepFlowState struct { LastPartition *QRepPartition `protobuf:"bytes,1,opt,name=last_partition,json=lastPartition,proto3" json:"last_partition,omitempty"` NumPartitionsProcessed uint64 `protobuf:"varint,2,opt,name=num_partitions_processed,json=numPartitionsProcessed,proto3" json:"num_partitions_processed,omitempty"` NeedsResync bool `protobuf:"varint,3,opt,name=needs_resync,json=needsResync,proto3" json:"needs_resync,omitempty"` + DisableWaitForNewRows bool `protobuf:"varint,4,opt,name=disable_wait_for_new_rows,json=disableWaitForNewRows,proto3" json:"disable_wait_for_new_rows,omitempty"` } func (x *QRepFlowState) Reset() { @@ -3280,6 +3281,13 @@ func (x *QRepFlowState) GetNeedsResync() bool { return false } +func (x *QRepFlowState) GetDisableWaitForNewRows() bool { + if x != nil { + return x.DisableWaitForNewRows + } + return false +} + var File_flow_proto protoreflect.FileDescriptor var file_flow_proto_rawDesc = []byte{ @@ -3941,7 +3949,7 @@ var file_flow_proto_rawDesc = []byte{ 0x64, 0x65, 0x6c, 0x74, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x70, 0x65, 0x65, 0x72, 0x64, 0x62, 0x5f, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x52, 0x11, 0x74, 0x61, 0x62, - 0x6c, 0x65, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x73, 0x22, 0xaf, + 0x6c, 0x65, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x73, 0x22, 0xe9, 0x01, 0x0a, 0x0d, 0x51, 0x52, 0x65, 0x70, 0x46, 0x6c, 0x6f, 0x77, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x41, 0x0a, 0x0e, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x70, 0x65, 0x65, 0x72, 0x64, @@ -3953,26 +3961,30 @@ var file_flow_proto_rawDesc = []byte{ 0x69, 0x6f, 0x6e, 0x73, 0x50, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x6e, 0x65, 0x65, 0x64, 0x73, 0x5f, 0x72, 0x65, 0x73, 0x79, 0x6e, 0x63, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0b, 0x6e, 0x65, 0x65, 0x64, 0x73, 0x52, 0x65, 0x73, 0x79, 0x6e, 0x63, - 0x2a, 0x50, 0x0a, 0x0c, 0x51, 0x52, 0x65, 0x70, 0x53, 0x79, 0x6e, 0x63, 0x4d, 0x6f, 0x64, 0x65, - 0x12, 0x1f, 0x0a, 0x1b, 0x51, 0x52, 0x45, 0x50, 0x5f, 0x53, 0x59, 0x4e, 0x43, 0x5f, 0x4d, 0x4f, - 0x44, 0x45, 0x5f, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x49, 0x4e, 0x53, 0x45, 0x52, 0x54, 0x10, - 0x00, 0x12, 0x1f, 0x0a, 0x1b, 0x51, 0x52, 0x45, 0x50, 0x5f, 0x53, 0x59, 0x4e, 0x43, 0x5f, 0x4d, - 0x4f, 0x44, 0x45, 0x5f, 0x53, 0x54, 0x4f, 0x52, 0x41, 0x47, 0x45, 0x5f, 0x41, 0x56, 0x52, 0x4f, - 0x10, 0x01, 0x2a, 0x66, 0x0a, 0x0d, 0x51, 0x52, 0x65, 0x70, 0x57, 0x72, 0x69, 0x74, 0x65, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x1a, 0x0a, 0x16, 0x51, 0x52, 0x45, 0x50, 0x5f, 0x57, 0x52, 0x49, 0x54, - 0x45, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x41, 0x50, 0x50, 0x45, 0x4e, 0x44, 0x10, 0x00, 0x12, - 0x1a, 0x0a, 0x16, 0x51, 0x52, 0x45, 0x50, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x4d, 0x4f, - 0x44, 0x45, 0x5f, 0x55, 0x50, 0x53, 0x45, 0x52, 0x54, 0x10, 0x01, 0x12, 0x1d, 0x0a, 0x19, 0x51, - 0x52, 0x45, 0x50, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x4f, - 0x56, 0x45, 0x52, 0x57, 0x52, 0x49, 0x54, 0x45, 0x10, 0x02, 0x42, 0x76, 0x0a, 0x0f, 0x63, 0x6f, - 0x6d, 0x2e, 0x70, 0x65, 0x65, 0x72, 0x64, 0x62, 0x5f, 0x66, 0x6c, 0x6f, 0x77, 0x42, 0x09, 0x46, - 0x6c, 0x6f, 0x77, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x10, 0x67, 0x65, 0x6e, 0x65, - 0x72, 0x61, 0x74, 0x65, 0x64, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0xa2, 0x02, 0x03, 0x50, - 0x58, 0x58, 0xaa, 0x02, 0x0a, 0x50, 0x65, 0x65, 0x72, 0x64, 0x62, 0x46, 0x6c, 0x6f, 0x77, 0xca, - 0x02, 0x0a, 0x50, 0x65, 0x65, 0x72, 0x64, 0x62, 0x46, 0x6c, 0x6f, 0x77, 0xe2, 0x02, 0x16, 0x50, - 0x65, 0x65, 0x72, 0x64, 0x62, 0x46, 0x6c, 0x6f, 0x77, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0a, 0x50, 0x65, 0x65, 0x72, 0x64, 0x62, 0x46, 0x6c, - 0x6f, 0x77, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x12, 0x38, 0x0a, 0x19, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x77, 0x61, 0x69, 0x74, + 0x5f, 0x66, 0x6f, 0x72, 0x5f, 0x6e, 0x65, 0x77, 0x5f, 0x72, 0x6f, 0x77, 0x73, 0x18, 0x04, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x15, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x57, 0x61, 0x69, 0x74, + 0x46, 0x6f, 0x72, 0x4e, 0x65, 0x77, 0x52, 0x6f, 0x77, 0x73, 0x2a, 0x50, 0x0a, 0x0c, 0x51, 0x52, + 0x65, 0x70, 0x53, 0x79, 0x6e, 0x63, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x1f, 0x0a, 0x1b, 0x51, 0x52, + 0x45, 0x50, 0x5f, 0x53, 0x59, 0x4e, 0x43, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x4d, 0x55, 0x4c, + 0x54, 0x49, 0x5f, 0x49, 0x4e, 0x53, 0x45, 0x52, 0x54, 0x10, 0x00, 0x12, 0x1f, 0x0a, 0x1b, 0x51, + 0x52, 0x45, 0x50, 0x5f, 0x53, 0x59, 0x4e, 0x43, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x53, 0x54, + 0x4f, 0x52, 0x41, 0x47, 0x45, 0x5f, 0x41, 0x56, 0x52, 0x4f, 0x10, 0x01, 0x2a, 0x66, 0x0a, 0x0d, + 0x51, 0x52, 0x65, 0x70, 0x57, 0x72, 0x69, 0x74, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1a, 0x0a, + 0x16, 0x51, 0x52, 0x45, 0x50, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x4d, 0x4f, 0x44, 0x45, + 0x5f, 0x41, 0x50, 0x50, 0x45, 0x4e, 0x44, 0x10, 0x00, 0x12, 0x1a, 0x0a, 0x16, 0x51, 0x52, 0x45, + 0x50, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x55, 0x50, 0x53, + 0x45, 0x52, 0x54, 0x10, 0x01, 0x12, 0x1d, 0x0a, 0x19, 0x51, 0x52, 0x45, 0x50, 0x5f, 0x57, 0x52, + 0x49, 0x54, 0x45, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x57, 0x52, 0x49, + 0x54, 0x45, 0x10, 0x02, 0x42, 0x76, 0x0a, 0x0f, 0x63, 0x6f, 0x6d, 0x2e, 0x70, 0x65, 0x65, 0x72, + 0x64, 0x62, 0x5f, 0x66, 0x6c, 0x6f, 0x77, 0x42, 0x09, 0x46, 0x6c, 0x6f, 0x77, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x10, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x2f, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0xa2, 0x02, 0x03, 0x50, 0x58, 0x58, 0xaa, 0x02, 0x0a, 0x50, + 0x65, 0x65, 0x72, 0x64, 0x62, 0x46, 0x6c, 0x6f, 0x77, 0xca, 0x02, 0x0a, 0x50, 0x65, 0x65, 0x72, + 0x64, 0x62, 0x46, 0x6c, 0x6f, 0x77, 0xe2, 0x02, 0x16, 0x50, 0x65, 0x65, 0x72, 0x64, 0x62, 0x46, + 0x6c, 0x6f, 0x77, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, + 0x02, 0x0a, 0x50, 0x65, 0x65, 0x72, 0x64, 0x62, 0x46, 0x6c, 0x6f, 0x77, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/flow/go.sum b/flow/go.sum index 3122b24711..1e3fe7ff88 100644 --- a/flow/go.sum +++ b/flow/go.sum @@ -289,6 +289,8 @@ github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= +github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/flow/workflows/qrep_flow.go b/flow/workflows/qrep_flow.go index f20d17951e..3b8e77a686 100644 --- a/flow/workflows/qrep_flow.go +++ b/flow/workflows/qrep_flow.go @@ -34,7 +34,7 @@ type QRepPartitionFlowExecution struct { runUUID string } -// returns a new empty PeerFlowState +// returns a new empty QRepFlowState func NewQRepFlowState() *protos.QRepFlowState { return &protos.QRepFlowState{ LastPartition: &protos.QRepPartition{ @@ -46,6 +46,19 @@ func NewQRepFlowState() *protos.QRepFlowState { } } +// returns a new empty QRepFlowState +func NewQRepFlowStateForTesting() *protos.QRepFlowState { + return &protos.QRepFlowState{ + LastPartition: &protos.QRepPartition{ + PartitionId: "not-applicable-partition", + Range: nil, + }, + NumPartitionsProcessed: 0, + NeedsResync: true, + DisableWaitForNewRows: true, + } +} + // NewQRepFlowExecution creates a new instance of QRepFlowExecution. func NewQRepFlowExecution(ctx workflow.Context, config *protos.QRepConfig, runUUID string) *QRepFlowExecution { return &QRepFlowExecution{ @@ -440,10 +453,12 @@ func QRepFlowWorkflow( state.LastPartition = partitions.Partitions[len(partitions.Partitions)-1] } - // sleep for a while and continue the workflow - err = q.waitForNewRows(ctx, state.LastPartition) - if err != nil { - return err + if !state.DisableWaitForNewRows { + // sleep for a while and continue the workflow + err = q.waitForNewRows(ctx, state.LastPartition) + if err != nil { + return err + } } workflow.GetLogger(ctx).Info("Continuing as new workflow", diff --git a/nexus/pt/src/peerdb_flow.rs b/nexus/pt/src/peerdb_flow.rs index 67ba78e80e..dc308131e6 100644 --- a/nexus/pt/src/peerdb_flow.rs +++ b/nexus/pt/src/peerdb_flow.rs @@ -457,8 +457,8 @@ pub struct QRepConfig { /// This is only used when sync_mode is AVRO /// this is the location where the avro files will be written /// if this starts with gs:// then it will be written to GCS - /// if this starts with s3:// then it will be written to S3 - /// if nothing is specified then it will be written to local disk, only supported in Snowflake + /// if this starts with s3:// then it will be written to S3, only supported in Snowflake + /// if nothing is specified then it will be written to local disk /// if using GCS or S3 make sure your instance has the correct permissions. #[prost(string, tag="15")] pub staging_path: ::prost::alloc::string::String, @@ -540,6 +540,8 @@ pub struct QRepFlowState { pub num_partitions_processed: u64, #[prost(bool, tag="3")] pub needs_resync: bool, + #[prost(bool, tag="4")] + pub disable_wait_for_new_rows: bool, } /// protos for qrep #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] diff --git a/nexus/pt/src/peerdb_flow.serde.rs b/nexus/pt/src/peerdb_flow.serde.rs index ebcd1ffe57..0436bf3345 100644 --- a/nexus/pt/src/peerdb_flow.serde.rs +++ b/nexus/pt/src/peerdb_flow.serde.rs @@ -3004,6 +3004,9 @@ impl serde::Serialize for QRepFlowState { if self.needs_resync { len += 1; } + if self.disable_wait_for_new_rows { + len += 1; + } let mut struct_ser = serializer.serialize_struct("peerdb_flow.QRepFlowState", len)?; if let Some(v) = self.last_partition.as_ref() { struct_ser.serialize_field("lastPartition", v)?; @@ -3014,6 +3017,9 @@ impl serde::Serialize for QRepFlowState { if self.needs_resync { struct_ser.serialize_field("needsResync", &self.needs_resync)?; } + if self.disable_wait_for_new_rows { + struct_ser.serialize_field("disableWaitForNewRows", &self.disable_wait_for_new_rows)?; + } struct_ser.end() } } @@ -3030,6 +3036,8 @@ impl<'de> serde::Deserialize<'de> for QRepFlowState { "numPartitionsProcessed", "needs_resync", "needsResync", + "disable_wait_for_new_rows", + "disableWaitForNewRows", ]; #[allow(clippy::enum_variant_names)] @@ -3037,6 +3045,7 @@ impl<'de> serde::Deserialize<'de> for QRepFlowState { LastPartition, NumPartitionsProcessed, NeedsResync, + DisableWaitForNewRows, __SkipField__, } impl<'de> serde::Deserialize<'de> for GeneratedField { @@ -3062,6 +3071,7 @@ impl<'de> serde::Deserialize<'de> for QRepFlowState { "lastPartition" | "last_partition" => Ok(GeneratedField::LastPartition), "numPartitionsProcessed" | "num_partitions_processed" => Ok(GeneratedField::NumPartitionsProcessed), "needsResync" | "needs_resync" => Ok(GeneratedField::NeedsResync), + "disableWaitForNewRows" | "disable_wait_for_new_rows" => Ok(GeneratedField::DisableWaitForNewRows), _ => Ok(GeneratedField::__SkipField__), } } @@ -3084,6 +3094,7 @@ impl<'de> serde::Deserialize<'de> for QRepFlowState { let mut last_partition__ = None; let mut num_partitions_processed__ = None; let mut needs_resync__ = None; + let mut disable_wait_for_new_rows__ = None; while let Some(k) = map.next_key()? { match k { GeneratedField::LastPartition => { @@ -3106,6 +3117,12 @@ impl<'de> serde::Deserialize<'de> for QRepFlowState { } needs_resync__ = Some(map.next_value()?); } + GeneratedField::DisableWaitForNewRows => { + if disable_wait_for_new_rows__.is_some() { + return Err(serde::de::Error::duplicate_field("disableWaitForNewRows")); + } + disable_wait_for_new_rows__ = Some(map.next_value()?); + } GeneratedField::__SkipField__ => { let _ = map.next_value::()?; } @@ -3115,6 +3132,7 @@ impl<'de> serde::Deserialize<'de> for QRepFlowState { last_partition: last_partition__, num_partitions_processed: num_partitions_processed__.unwrap_or_default(), needs_resync: needs_resync__.unwrap_or_default(), + disable_wait_for_new_rows: disable_wait_for_new_rows__.unwrap_or_default(), }) } } diff --git a/protos/flow.proto b/protos/flow.proto index 6289f993f2..281f609993 100644 --- a/protos/flow.proto +++ b/protos/flow.proto @@ -307,8 +307,8 @@ message QRepConfig { // This is only used when sync_mode is AVRO // this is the location where the avro files will be written // if this starts with gs:// then it will be written to GCS - // if this starts with s3:// then it will be written to S3 - // if nothing is specified then it will be written to local disk, only supported in Snowflake + // if this starts with s3:// then it will be written to S3, only supported in Snowflake + // if nothing is specified then it will be written to local disk // if using GCS or S3 make sure your instance has the correct permissions. string staging_path = 15; @@ -364,4 +364,5 @@ message QRepFlowState { QRepPartition last_partition = 1; uint64 num_partitions_processed = 2; bool needs_resync = 3; + bool disable_wait_for_new_rows = 4; } diff --git a/ui/grpc_generated/flow.ts b/ui/grpc_generated/flow.ts index 95118814f5..3e8f36e97f 100644 --- a/ui/grpc_generated/flow.ts +++ b/ui/grpc_generated/flow.ts @@ -416,8 +416,8 @@ export interface QRepConfig { * This is only used when sync_mode is AVRO * this is the location where the avro files will be written * if this starts with gs:// then it will be written to GCS - * if this starts with s3:// then it will be written to S3 - * if nothing is specified then it will be written to local disk, only supported in Snowflake + * if this starts with s3:// then it will be written to S3, only supported in Snowflake + * if nothing is specified then it will be written to local disk * if using GCS or S3 make sure your instance has the correct permissions. */ stagingPath: string; @@ -475,6 +475,7 @@ export interface QRepFlowState { lastPartition: QRepPartition | undefined; numPartitionsProcessed: number; needsResync: boolean; + disableWaitForNewRows: boolean; } function createBaseTableNameMapping(): TableNameMapping { @@ -6155,7 +6156,7 @@ export const ReplayTableSchemaDeltaInput = { }; function createBaseQRepFlowState(): QRepFlowState { - return { lastPartition: undefined, numPartitionsProcessed: 0, needsResync: false }; + return { lastPartition: undefined, numPartitionsProcessed: 0, needsResync: false, disableWaitForNewRows: false }; } export const QRepFlowState = { @@ -6169,6 +6170,9 @@ export const QRepFlowState = { if (message.needsResync === true) { writer.uint32(24).bool(message.needsResync); } + if (message.disableWaitForNewRows === true) { + writer.uint32(32).bool(message.disableWaitForNewRows); + } return writer; }, @@ -6200,6 +6204,13 @@ export const QRepFlowState = { message.needsResync = reader.bool(); continue; + case 4: + if (tag !== 32) { + break; + } + + message.disableWaitForNewRows = reader.bool(); + continue; } if ((tag & 7) === 4 || tag === 0) { break; @@ -6214,6 +6225,7 @@ export const QRepFlowState = { lastPartition: isSet(object.lastPartition) ? QRepPartition.fromJSON(object.lastPartition) : undefined, numPartitionsProcessed: isSet(object.numPartitionsProcessed) ? Number(object.numPartitionsProcessed) : 0, needsResync: isSet(object.needsResync) ? Boolean(object.needsResync) : false, + disableWaitForNewRows: isSet(object.disableWaitForNewRows) ? Boolean(object.disableWaitForNewRows) : false, }; }, @@ -6228,6 +6240,9 @@ export const QRepFlowState = { if (message.needsResync === true) { obj.needsResync = message.needsResync; } + if (message.disableWaitForNewRows === true) { + obj.disableWaitForNewRows = message.disableWaitForNewRows; + } return obj; }, @@ -6241,6 +6256,7 @@ export const QRepFlowState = { : undefined; message.numPartitionsProcessed = object.numPartitionsProcessed ?? 0; message.needsResync = object.needsResync ?? false; + message.disableWaitForNewRows = object.disableWaitForNewRows ?? false; return message; }, }; From c57a5242a5d0053efa2559af17e59279b5cc4a74 Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Fri, 17 Nov 2023 00:49:26 +0530 Subject: [PATCH 02/10] trying DEFLATE and delaying QRep --- flow/connectors/bigquery/qrep_avro_sync.go | 3 ++- flow/e2e/bigquery/qrep_flow_bq_test.go | 2 +- flow/e2e/postgres/qrep_flow_pg_test.go | 2 +- flow/e2e/s3/qrep_flow_s3_test.go | 4 ++-- flow/e2e/snowflake/qrep_flow_sf_test.go | 10 +++++----- flow/e2e/sqlserver/qrep_flow_sqlserver_test.go | 2 +- flow/e2e/test_utils.go | 12 ++---------- 7 files changed, 14 insertions(+), 21 deletions(-) diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index 7a44352fc0..e0c4da5556 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -328,7 +328,7 @@ func (s *QRepAvroSyncMethod) writeToStage( var avroFilePath string numRecords, err := func() (int, error) { ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, - avro.CompressSnappy, qvalue.QDWHTypeBigQuery) + avro.CompressDeflate, qvalue.QDWHTypeBigQuery) if s.gcsBucket != "" { bucket := s.connector.storageClient.Bucket(s.gcsBucket) avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", objectFolder, syncID) @@ -370,6 +370,7 @@ func (s *QRepAvroSyncMethod) writeToStage( if s.gcsBucket != "" { gcsRef := bigquery.NewGCSReference(fmt.Sprintf("gs://%s/%s", s.gcsBucket, avroFilePath)) gcsRef.SourceFormat = bigquery.Avro + gcsRef.Compression = bigquery.Deflate avroRef = gcsRef } else { fh, err := os.Open(avroFilePath) diff --git a/flow/e2e/bigquery/qrep_flow_bq_test.go b/flow/e2e/bigquery/qrep_flow_bq_test.go index 9183762cde..e110b451da 100644 --- a/flow/e2e/bigquery/qrep_flow_bq_test.go +++ b/flow/e2e/bigquery/qrep_flow_bq_test.go @@ -68,7 +68,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { s.bqHelper.Peer, "peerdb_staging") s.NoError(err) - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index 52386711cf..df1653b992 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -171,7 +171,7 @@ func (s *PeerFlowE2ETestSuitePG) Test_Complete_QRep_Flow_Multi_Insert_PG() { ) s.NoError(err) - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go index c3845f5f16..2fca18a700 100644 --- a/flow/e2e/s3/qrep_flow_s3_test.go +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -114,7 +114,7 @@ func (s *PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { s.NoError(err) qrepConfig.StagingPath = s.s3Helper.s3Config.Url - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -164,7 +164,7 @@ func (s *PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { qrepConfig.InitialCopyOnly = true qrepConfig.WatermarkColumn = "ctid" - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/snowflake/qrep_flow_sf_test.go b/flow/e2e/snowflake/qrep_flow_sf_test.go index c1516ca8df..82901beac2 100644 --- a/flow/e2e/snowflake/qrep_flow_sf_test.go +++ b/flow/e2e/snowflake/qrep_flow_sf_test.go @@ -81,7 +81,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { ) s.NoError(err) - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -126,7 +126,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() } s.NoError(err) - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -168,7 +168,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { s.NoError(err) qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -212,7 +212,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { qrepConfig.WatermarkColumn = "xmin" s.NoError(err) - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) @@ -256,7 +256,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration( s.NoError(err) qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go index 6a70377ac4..9c2b27bcb0 100644 --- a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go +++ b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go @@ -168,7 +168,7 @@ func (s *PeerFlowE2ETestSuiteSQLServer) Test_Complete_QRep_Flow_SqlServer_Append WaitBetweenBatchesSeconds: 5, } - e2e.RunQrepFlowWorkflow(s.WorkflowTestSuite, qrepConfig) + e2e.RunQrepFlowWorkflow(env, qrepConfig) // Verify workflow completes without error s.True(env.IsWorkflowCompleted()) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index d307039cdd..26db73f765 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -297,18 +297,10 @@ func CreateQRepWorkflowConfig( return qrepConfig, nil } -func RunQrepFlowWorkflow(suite testsuite.WorkflowTestSuite, config *protos.QRepConfig) bool { - env := suite.NewTestWorkflowEnvironment() - RegisterWorkflowsAndActivities(env) +func RunQrepFlowWorkflow(env *testsuite.TestWorkflowEnvironment, config *protos.QRepConfig) { state := peerflow.NewQRepFlowStateForTesting() + time.Sleep(5 * time.Second) env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, state) - if !env.IsWorkflowCompleted() { - return false - } - env = suite.NewTestWorkflowEnvironment() - RegisterWorkflowsAndActivities(env) - env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, state) - return env.IsWorkflowCompleted() } func GetOwnersSchema() *model.QRecordSchema { From 298fab85d8f430399d1ce5256697d07d3824ddbc Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Fri, 17 Nov 2023 00:51:57 +0530 Subject: [PATCH 03/10] minor revert --- flow/e2e/test_utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 26db73f765..659b5249ac 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -298,7 +298,7 @@ func CreateQRepWorkflowConfig( } func RunQrepFlowWorkflow(env *testsuite.TestWorkflowEnvironment, config *protos.QRepConfig) { - state := peerflow.NewQRepFlowStateForTesting() + state := peerflow.NewQRepFlowState() time.Sleep(5 * time.Second) env.ExecuteWorkflow(peerflow.QRepFlowWorkflow, config, state) } From d4d5474bf1f36c1a2c23c1e0343460f2af65326c Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Fri, 17 Nov 2023 01:24:50 +0530 Subject: [PATCH 04/10] minor fix again --- flow/e2e/test_utils.go | 1 + 1 file changed, 1 insertion(+) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 659b5249ac..007f502c02 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -293,6 +293,7 @@ func CreateQRepWorkflowConfig( if err != nil { return nil, err } + qrepConfig.InitialCopyOnly = true return qrepConfig, nil } From a9c94da89b699bb9012aef69fc99ffcd055ebc1d Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Fri, 17 Nov 2023 01:25:54 +0530 Subject: [PATCH 05/10] minor lint --- flow/connectors/snowflake/qrep_avro_sync.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flow/connectors/snowflake/qrep_avro_sync.go b/flow/connectors/snowflake/qrep_avro_sync.go index fcadb3298b..7d540c9f2f 100644 --- a/flow/connectors/snowflake/qrep_avro_sync.go +++ b/flow/connectors/snowflake/qrep_avro_sync.go @@ -274,7 +274,8 @@ func (s *SnowflakeAvroSyncMethod) writeToAvroFile( ) (int, string, error) { var numRecords int if s.config.StagingPath == "" { - ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, avro.CompressZstd, qvalue.QDWHTypeSnowflake) + ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, avro.CompressZstd, + qvalue.QDWHTypeSnowflake) tmpDir, err := os.MkdirTemp("", "peerdb-avro") if err != nil { return 0, "", fmt.Errorf("failed to create temp dir: %w", err) From 28d286138bb1b272452568db00477e2a3579f845 Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Fri, 17 Nov 2023 18:50:24 +0530 Subject: [PATCH 06/10] increase reading time and giving Snappy another chance --- .github/workflows/flow.yml | 2 +- flow/connectors/bigquery/qrep_avro_sync.go | 5 ++++- flow/e2e/snowflake/peer_flow_sf_test.go | 4 ++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/flow.yml b/.github/workflows/flow.yml index e5df71cbf6..3085d14df4 100644 --- a/.github/workflows/flow.yml +++ b/.github/workflows/flow.yml @@ -120,4 +120,4 @@ jobs: PEERDB_CATALOG_USER: postgres PEERDB_CATALOG_PASSWORD: postgres PEERDB_CATALOG_DATABASE: postgres - PEERDB_CDC_IDLE_TIMEOUT_SECONDS: 3 + PEERDB_CDC_IDLE_TIMEOUT_SECONDS: 10 diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index e0c4da5556..c7ce8ce198 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -328,7 +328,7 @@ func (s *QRepAvroSyncMethod) writeToStage( var avroFilePath string numRecords, err := func() (int, error) { ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, - avro.CompressDeflate, qvalue.QDWHTypeBigQuery) + avro.CompressSnappy, qvalue.QDWHTypeBigQuery) if s.gcsBucket != "" { bucket := s.connector.storageClient.Bucket(s.gcsBucket) avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", objectFolder, syncID) @@ -360,6 +360,9 @@ func (s *QRepAvroSyncMethod) writeToStage( if err != nil { return 0, err } + if numRecords == 0 { + return 0, nil + } log.WithFields(log.Fields{ "batchOrPartitionID": syncID, }).Infof("wrote %d records to file %s", numRecords, avroFilePath) diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 907da8ed89..49aee77b2b 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -66,6 +66,10 @@ func TestPeerFlowE2ETestSuiteSF(t *testing.T) { t.Run(tt.name, tt.test) } + + t.Cleanup(func() { + s.TearDownSuite() + }) } func (s *PeerFlowE2ETestSuiteSF) attachSchemaSuffix(tableName string) string { From 5e31c4603d56a73545b77c6b320787ca8e030fa8 Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Sat, 18 Nov 2023 15:35:20 +0530 Subject: [PATCH 07/10] no Snappy again --- flow/connectors/bigquery/qrep_avro_sync.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index c7ce8ce198..97c043ac04 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -328,10 +328,10 @@ func (s *QRepAvroSyncMethod) writeToStage( var avroFilePath string numRecords, err := func() (int, error) { ocfWriter := avro.NewPeerDBOCFWriter(s.connector.ctx, stream, avroSchema, - avro.CompressSnappy, qvalue.QDWHTypeBigQuery) + avro.CompressNone, qvalue.QDWHTypeBigQuery) if s.gcsBucket != "" { bucket := s.connector.storageClient.Bucket(s.gcsBucket) - avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", objectFolder, syncID) + avroFilePath = fmt.Sprintf("%s/%s.avro", objectFolder, syncID) obj := bucket.Object(avroFilePath) w := obj.NewWriter(s.connector.ctx) @@ -346,7 +346,7 @@ func (s *QRepAvroSyncMethod) writeToStage( return 0, fmt.Errorf("failed to create temp dir: %w", err) } - avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", tmpDir, syncID) + avroFilePath = fmt.Sprintf("%s/%s.avro", tmpDir, syncID) log.WithFields(log.Fields{ "batchOrPartitionID": syncID, }).Infof("writing records to local file %s", avroFilePath) From a9b982f35af857e31d7839670d9b91b5cf85251d Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Tue, 21 Nov 2023 23:00:22 +0530 Subject: [PATCH 08/10] unparallel your tests --- flow/connectors/bigquery/qrep_avro_sync.go | 4 +- flow/e2e/bigquery/peer_flow_bq_test.go | 234 ++++++++------------- flow/e2e/congen.go | 15 -- flow/e2e/snowflake/peer_flow_sf_test.go | 227 ++++++++------------ flow/model/qrecord_batch.go | 17 +- 5 files changed, 191 insertions(+), 306 deletions(-) diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index 97c043ac04..5b9b99b13d 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -331,7 +331,7 @@ func (s *QRepAvroSyncMethod) writeToStage( avro.CompressNone, qvalue.QDWHTypeBigQuery) if s.gcsBucket != "" { bucket := s.connector.storageClient.Bucket(s.gcsBucket) - avroFilePath = fmt.Sprintf("%s/%s.avro", objectFolder, syncID) + avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", objectFolder, syncID) obj := bucket.Object(avroFilePath) w := obj.NewWriter(s.connector.ctx) @@ -346,7 +346,7 @@ func (s *QRepAvroSyncMethod) writeToStage( return 0, fmt.Errorf("failed to create temp dir: %w", err) } - avroFilePath = fmt.Sprintf("%s/%s.avro", tmpDir, syncID) + avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", tmpDir, syncID) log.WithFields(log.Fields{ "batchOrPartitionID": syncID, }).Infof("writing records to local file %s", avroFilePath) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 8d519b4f99..dd4954284b 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -5,6 +5,7 @@ import ( "fmt" "strings" "testing" + "time" "github.com/PeerDB-io/peer-flow/e2e" "github.com/PeerDB-io/peer-flow/generated/protos" @@ -28,45 +29,7 @@ type PeerFlowE2ETestSuiteBQ struct { } func TestPeerFlowE2ETestSuiteBQ(t *testing.T) { - s := &PeerFlowE2ETestSuiteBQ{} - s.SetT(t) - s.SetupSuite() - - tests := []struct { - name string - test func(t *testing.T) - }{ - {"Test_Invalid_Connection_Config", s.Test_Invalid_Connection_Config}, - {"Test_Complete_Flow_No_Data", s.Test_Complete_Flow_No_Data}, - {"Test_Char_ColType_Error", s.Test_Char_ColType_Error}, - {"Test_Complete_Simple_Flow_BQ", s.Test_Complete_Simple_Flow_BQ}, - {"Test_Toast_BQ", s.Test_Toast_BQ}, - {"Test_Toast_Nochanges_BQ", s.Test_Toast_Nochanges_BQ}, - {"Test_Toast_Advance_1_BQ", s.Test_Toast_Advance_1_BQ}, - {"Test_Toast_Advance_2_BQ", s.Test_Toast_Advance_2_BQ}, - {"Test_Toast_Advance_3_BQ", s.Test_Toast_Advance_3_BQ}, - {"Test_Types_BQ", s.Test_Types_BQ}, - {"Test_Multi_Table_BQ", s.Test_Multi_Table_BQ}, - {"Test_Simple_Schema_Changes_BQ", s.Test_Simple_Schema_Changes_BQ}, - {"Test_Composite_PKey_BQ", s.Test_Composite_PKey_BQ}, - {"Test_Composite_PKey_Toast_1_BQ", s.Test_Composite_PKey_Toast_1_BQ}, - {"Test_Composite_PKey_Toast_2_BQ", s.Test_Composite_PKey_Toast_2_BQ}, - } - - // Assert that there are no duplicate test names - testNames := make(map[string]bool) - for _, tt := range tests { - if testNames[tt.name] { - t.Fatalf("duplicate test name: %s", tt.name) - } - testNames[tt.name] = true - - t.Run(tt.name, tt.test) - } - - t.Cleanup(func() { - s.TearDownSuite() - }) + suite.Run(t, new(PeerFlowE2ETestSuiteBQ)) } func (s *PeerFlowE2ETestSuiteBQ) attachSchemaSuffix(tableName string) string { @@ -115,7 +78,9 @@ func (s *PeerFlowE2ETestSuiteBQ) SetupSuite() { s.setupTemporalLogger() - s.bqSuffix = strings.ToLower(util.RandomString(8)) + suffix := util.RandomString(8) + tsSuffix := time.Now().Format("20060102150405") + s.bqSuffix = fmt.Sprintf("bq_%s_%s", strings.ToLower(suffix), tsSuffix) pool, err := e2e.SetupPostgres(s.bqSuffix) if err != nil { s.Fail("failed to setup postgres", err) @@ -141,8 +106,7 @@ func (s *PeerFlowE2ETestSuiteBQ) TearDownSuite() { } } -func (s *PeerFlowE2ETestSuiteBQ) Test_Invalid_Connection_Config(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Invalid_Connection_Config() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -165,8 +129,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Invalid_Connection_Config(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -180,7 +143,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data(t *testing.T) { value VARCHAR(255) NOT NULL ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_complete_flow_no_data"), @@ -188,11 +151,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 1, @@ -212,8 +175,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -227,7 +189,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error(t *testing.T) { value CHAR(255) NOT NULL ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_char_table"), @@ -235,11 +197,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 1, @@ -262,8 +224,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error(t *testing.T) { // Test_Complete_Simple_Flow_BQ tests a complete flow with data in the source table. // The test inserts 10 rows into the source table and verifies that the data is // correctly synced to the destination table after sync flow completes. -func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -277,7 +238,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ(t *testing.T) { value TEXT NOT NULL ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_complete_simple_flow"), @@ -285,14 +246,14 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 2, + TotalSyncFlows: 3, MaxBatchSize: 100, } @@ -307,7 +268,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ(t *testing.T) { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(key, value) VALUES ($1, $2) `, srcTableName), testKey, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") }() @@ -323,7 +284,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ(t *testing.T) { s.Contains(err.Error(), "continue as new") count, err := s.bqHelper.countRows(dstTableName) - require.NoError(t, err) + s.NoError(err) s.Equal(10, count) // TODO: verify that the data is correctly synced to the destination table @@ -332,8 +293,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -348,7 +308,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ(t *testing.T) { k int ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_1"), @@ -356,11 +316,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -385,7 +345,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ(t *testing.T) { UPDATE %s SET t1='dummy' WHERE id=2; END; `, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -403,8 +363,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -419,7 +378,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ(t *testing.T) { k int ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_2"), @@ -427,11 +386,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -449,7 +408,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ(t *testing.T) { UPDATE %s SET t1='dummy' WHERE id=2; END; `, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -467,8 +426,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Nochanges_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -483,7 +441,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ(t *testing.T) { k int ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_3"), @@ -491,11 +449,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 1, @@ -526,7 +484,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ(t *testing.T) { END; `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -544,8 +502,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -559,7 +516,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ(t *testing.T) { k int ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_4"), @@ -567,11 +524,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -596,7 +553,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ(t *testing.T) { UPDATE %s SET k=4 WHERE id=1; END; `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -614,8 +571,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -630,7 +586,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ(t *testing.T) { k int ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_5"), @@ -638,11 +594,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -666,7 +622,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ(t *testing.T) { UPDATE %s SET t2='dummy' WHERE id=1; END; `, srcTableName, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -684,8 +640,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -700,7 +655,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ(t *testing.T) { c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 INT[], c43 FLOAT[], c44 TEXT[]); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_types_bq"), @@ -708,11 +663,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ @@ -739,7 +694,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ(t *testing.T) { ARRAY[0.0003, 8902.0092], ARRAY['hello','bye']; `, srcTableName)) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -765,8 +720,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Types_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -779,7 +733,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { CREATE TABLE %s (id serial primary key, c1 int, c2 text); CREATE TABLE %s(id serial primary key, c1 int, c2 text); `, srcTable1Name, srcTable2Name)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_multi_table_bq"), @@ -787,11 +741,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -807,7 +761,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); `, srcTable1Name, srcTable2Name)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed an insert on two tables") }() @@ -818,9 +772,9 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { err = env.GetWorkflowError() count1, err := s.bqHelper.countRows(dstTable1Name) - require.NoError(t, err) + s.NoError(err) count2, err := s.bqHelper.countRows(dstTable2Name) - require.NoError(t, err) + s.NoError(err) s.Equal(1, count1) s.Equal(1, count2) @@ -829,8 +783,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ(t *testing.T) { } // TODO: not checking schema exactly, add later -func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -843,7 +796,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { c1 BIGINT ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_simple_schema_changes"), @@ -851,11 +804,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 10, @@ -869,7 +822,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted initial row in the source table") // verify we got our first row. @@ -879,11 +832,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s ADD COLUMN c2 BIGINT`, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Altered source table, added column c2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2) VALUES ($1,$2)`, srcTableName), 2, 2) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted row with added c2 in the source table") // verify we got our two rows, if schema did not match up it will error. @@ -893,11 +846,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { // alter source table, add column c3, drop column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c2, ADD COLUMN c3 BIGINT`, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Altered source table, dropped column c2 and added column c3") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c3) VALUES ($1,$2)`, srcTableName), 3, 3) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted row with added c3 in the source table") // verify we got our two rows, if schema did not match up it will error. @@ -907,11 +860,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { // alter source table, drop column c3 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c3`, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Altered source table, dropped column c3") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 4) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted row after dropping all columns in the source table") // verify we got our two rows, if schema did not match up it will error. @@ -932,8 +885,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -949,7 +901,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ(t *testing.T) { PRIMARY KEY(id,t) ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_flow"), @@ -957,11 +909,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -978,7 +930,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ(t *testing.T) { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t) VALUES ($1,$2) `, srcTableName), i, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") @@ -988,9 +940,9 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ(t *testing.T) { _, err := s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -1008,8 +960,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -1026,7 +977,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ(t *testing.T) { PRIMARY KEY(id,t) ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast1_flow"), @@ -1034,11 +985,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -1050,7 +1001,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ(t *testing.T) { go func() { e2e.SetupCDCFlowStatusQuery(env, connectionGen) rowsTx, err := s.pool.Begin(context.Background()) - require.NoError(t, err) + s.NoError(err) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -1058,18 +1009,18 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ(t *testing.T) { _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - require.NoError(t, err) + s.NoError(err) err = rowsTx.Commit(context.Background()) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -1088,8 +1039,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -1106,7 +1056,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ(t *testing.T) { PRIMARY KEY(id,t) ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast2_flow"), @@ -1114,11 +1064,11 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ(t *testing.T) { PostgresPort: e2e.PostgresPort, Destination: s.bqHelper.Peer, CDCSyncMode: protos.QRepSyncMode_QREP_SYNC_MODE_STORAGE_AVRO, - CdcStagingPath: "peerdb_staging", + CdcStagingPath: "", } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -1136,16 +1086,16 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ(t *testing.T) { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") e2e.NormalizeFlowCountQuery(env, connectionGen, 2) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index 3a0f71b15a..fe1c6c029a 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -94,21 +94,6 @@ func SetupPostgres(suffix string) (*pgxpool.Pool, error) { return nil, fmt.Errorf("failed to create e2e_test schema: %w", err) } - _, err = pool.Exec(context.Background(), ` - SELECT pg_advisory_lock(hashtext('peerdb_pg_setup_lock')); - CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ - SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', - round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); - $$ language sql; - CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) - RETURNS bytea AS $body$ - SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') - FROM generate_series(1, $1); - $body$ - LANGUAGE 'sql' - VOLATILE - SET search_path = 'pg_catalog'; - `) if err != nil { return nil, fmt.Errorf("failed to create utility functions: %w", err) } diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index 49aee77b2b..37848f0383 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -32,44 +32,7 @@ type PeerFlowE2ETestSuiteSF struct { } func TestPeerFlowE2ETestSuiteSF(t *testing.T) { - s := &PeerFlowE2ETestSuiteSF{} - s.SetT(t) - s.SetupSuite() - - tests := []struct { - name string - test func(t *testing.T) - }{ - {"Test_Complete_Simple_Flow_SF", s.Test_Complete_Simple_Flow_SF}, - {"Test_Invalid_Geo_SF_Avro_CDC", s.Test_Invalid_Geo_SF_Avro_CDC}, - {"Test_Toast_SF", s.Test_Toast_SF}, - {"Test_Toast_Nochanges_SF", s.Test_Toast_Nochanges_SF}, - {"Test_Toast_Advance_1_SF", s.Test_Toast_Advance_1_SF}, - {"Test_Toast_Advance_2_SF", s.Test_Toast_Advance_2_SF}, - {"Test_Toast_Advance_3_SF", s.Test_Toast_Advance_3_SF}, - {"Test_Types_SF", s.Test_Types_SF}, - {"Test_Multi_Table_SF", s.Test_Multi_Table_SF}, - {"Test_Simple_Schema_Changes_SF", s.Test_Simple_Schema_Changes_SF}, - {"Test_Composite_PKey_SF", s.Test_Composite_PKey_SF}, - {"Test_Composite_PKey_Toast_1_SF", s.Test_Composite_PKey_Toast_1_SF}, - {"Test_Composite_PKey_Toast_2_SF", s.Test_Composite_PKey_Toast_2_SF}, - {"Test_Column_Exclusion", s.Test_Column_Exclusion}, - } - - // assert that there are no duplicate test names - testNames := make(map[string]bool) - for _, tt := range tests { - if testNames[tt.name] { - t.Fatalf("duplicate test name: %s", tt.name) - } - testNames[tt.name] = true - - t.Run(tt.name, tt.test) - } - - t.Cleanup(func() { - s.TearDownSuite() - }) + suite.Run(t, new(PeerFlowE2ETestSuiteSF)) } func (s *PeerFlowE2ETestSuiteSF) attachSchemaSuffix(tableName string) string { @@ -151,8 +114,7 @@ func (s *PeerFlowE2ETestSuiteSF) TearDownSuite() { require.NoError(s.T(), err) } -func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -166,7 +128,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { value TEXT NOT NULL ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_simple_flow"), @@ -177,7 +139,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -195,7 +157,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s (key, value) VALUES ($1, $2) `, srcTableName), testKey, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") }() @@ -211,7 +173,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { s.Contains(err.Error(), "continue as new") count, err := s.sfHelper.CountRows("test_simple_flow_sf") - require.NoError(t, err) + s.NoError(err) s.Equal(20, count) // check the number of rows where _PEERDB_SYNCED_AT is newer than 5 mins ago @@ -220,7 +182,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { SELECT COUNT(*) FROM %s WHERE _PEERDB_SYNCED_AT > CURRENT_TIMESTAMP() - INTERVAL '30 MINUTE' `, dstTableName) numNewRows, err := s.sfHelper.RunIntQuery(newerSyncedAtQuery) - require.NoError(t, err) + s.NoError(err) s.Equal(20, numNewRows) // TODO: verify that the data is correctly synced to the destination table @@ -229,8 +191,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Complete_Simple_Flow_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -244,7 +205,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC(t *testing.T) { poly GEOGRAPHY(POLYGON) NOT NULL ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_invalid_geo_sf_avro_cdc"), @@ -255,7 +216,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -276,7 +237,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC(t *testing.T) { "5fc64140f2567052abc2c9bf2df9c5925fc641409394e16573c2c9bf2df9c5925fc6414049eceda9afc1c9bfdd1cc1a05fc64140fe43faedebc0"+ "c9bf4694f6065fc64140fe43faedebc0c9bfffe7305f5ec641406693d6f2ddc0c9bf1a8361d35dc64140afdb8d2b1bc3c9bf", ) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 4 invalid geography rows into the source table") for i := 4; i < 10; i++ { @@ -286,7 +247,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC(t *testing.T) { "010300000001000000050000000000000000000000000000000000000000000000"+ "00000000000000000000f03f000000000000f03f000000000000f03f0000000000"+ "00f03f000000000000000000000000000000000000000000000000") - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 6 valid geography rows and 10 total rows into source") }() @@ -304,11 +265,11 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC(t *testing.T) { // We inserted 4 invalid shapes in each. // They should have filtered out as null on destination lineCount, err := s.sfHelper.CountNonNullRows("test_invalid_geo_sf_avro_cdc", "line") - require.NoError(t, err) + s.NoError(err) s.Equal(6, lineCount) polyCount, err := s.sfHelper.CountNonNullRows("test_invalid_geo_sf_avro_cdc", "poly") - require.NoError(t, err) + s.NoError(err) s.Equal(6, polyCount) // TODO: verify that the data is correctly synced to the destination table @@ -317,8 +278,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -333,7 +293,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF(t *testing.T) { k int ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_1"), @@ -344,7 +304,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -369,7 +329,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF(t *testing.T) { UPDATE %s SET t1='dummy' WHERE id=2; END; `, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -387,8 +347,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -405,7 +364,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF(t *testing.T) { ); `, srcTableName, srcTableName)) log.Infof("Creating table '%s', err: %v", srcTableName, err) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_2"), @@ -416,7 +375,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -434,7 +393,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF(t *testing.T) { UPDATE %s SET t1='dummy' WHERE id=2; END; `, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -452,8 +411,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Nochanges_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -469,7 +427,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF(t *testing.T) { k int ); `, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_3"), @@ -480,7 +438,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -511,7 +469,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF(t *testing.T) { END; `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -529,8 +487,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -545,7 +502,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF(t *testing.T) { k int ); `, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_4"), @@ -556,7 +513,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -581,7 +538,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF(t *testing.T) { UPDATE %s SET k=4 WHERE id=1; END; `, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -599,8 +556,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -616,7 +572,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF(t *testing.T) { k int ); `, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_5"), @@ -627,7 +583,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -651,7 +607,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF(t *testing.T) { UPDATE %s SET t2='dummy' WHERE id=1; END; `, srcTableName, srcTableName, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Executed a transaction touching toast columns") }() @@ -669,8 +625,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -687,7 +642,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF(t *testing.T) { c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 GEOMETRY(POINT), c43 GEOGRAPHY(POINT), c44 GEOGRAPHY(POLYGON), c45 GEOGRAPHY(LINESTRING), c46 GEOMETRY(LINESTRING), c47 GEOMETRY(POLYGON)); `, srcTableName, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_types_sf"), @@ -698,7 +653,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -724,7 +679,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF(t *testing.T) { 'LINESTRING(-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831)','LINESTRING(0 0, 1 1, 2 2)', 'POLYGON((-74.0060 40.7128, -73.9352 40.7306, -73.9123 40.7831, -74.0060 40.7128))'; `, srcTableName)) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -750,8 +705,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Types_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -764,7 +718,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF(t *testing.T) { CREATE TABLE IF NOT EXISTS %s (id serial primary key, c1 int, c2 text); CREATE TABLE IF NOT EXISTS %s (id serial primary key, c1 int, c2 text); `, srcTable1Name, srcTable2Name)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_multi_table"), @@ -775,7 +729,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -791,7 +745,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF(t *testing.T) { INSERT INTO %s (c1,c2) VALUES (1,'dummy_1'); INSERT INTO %s (c1,c2) VALUES (-1,'dummy_-1'); `, srcTable1Name, srcTable2Name)) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -801,9 +755,9 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF(t *testing.T) { err = env.GetWorkflowError() count1, err := s.sfHelper.CountRows("test1_sf") - require.NoError(t, err) + s.NoError(err) count2, err := s.sfHelper.CountRows("test2_sf") - require.NoError(t, err) + s.NoError(err) s.Equal(1, count1) s.Equal(1, count2) @@ -811,8 +765,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -825,7 +778,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { c1 BIGINT ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_simple_schema_changes"), @@ -836,7 +789,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 10, @@ -850,7 +803,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { e2e.SetupCDCFlowStatusQuery(env, connectionGen) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted initial row in the source table") // verify we got our first row. @@ -867,18 +820,18 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { output, err := s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ TableIdentifiers: []string{dstTableName}, }) - require.NoError(t, err) + s.NoError(err) s.Equal(expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) s.compareTableContentsSF("test_simple_schema_changes", "id,c1", false) // alter source table, add column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s ADD COLUMN c2 BIGINT`, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Altered source table, added column c2") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c2) VALUES ($1,$2)`, srcTableName), 2, 2) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted row with added c2 in the source table") // verify we got our two rows, if schema did not match up it will error. @@ -896,18 +849,18 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ TableIdentifiers: []string{dstTableName}, }) - require.NoError(t, err) + s.NoError(err) s.Equal(expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) s.compareTableContentsSF("test_simple_schema_changes", "id,c1,c2", false) // alter source table, add column c3, drop column c2 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c2, ADD COLUMN c3 BIGINT`, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Altered source table, dropped column c2 and added column c3") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1,c3) VALUES ($1,$2)`, srcTableName), 3, 3) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted row with added c3 in the source table") // verify we got our two rows, if schema did not match up it will error. @@ -926,18 +879,18 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ TableIdentifiers: []string{dstTableName}, }) - require.NoError(t, err) + s.NoError(err) s.Equal(expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) s.compareTableContentsSF("test_simple_schema_changes", "id,c1,c3", false) // alter source table, drop column c3 and insert another row. _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` ALTER TABLE %s DROP COLUMN c3`, srcTableName)) - require.NoError(t, err) + s.NoError(err) fmt.Println("Altered source table, dropped column c3") _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c1) VALUES ($1)`, srcTableName), 4) - require.NoError(t, err) + s.NoError(err) fmt.Println("Inserted row after dropping all columns in the source table") // verify we got our two rows, if schema did not match up it will error. @@ -956,7 +909,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { output, err = s.connector.GetTableSchema(&protos.GetTableSchemaBatchInput{ TableIdentifiers: []string{dstTableName}, }) - require.NoError(t, err) + s.NoError(err) s.Equal(expectedTableSchema, output.TableNameSchemaMapping[dstTableName]) s.compareTableContentsSF("test_simple_schema_changes", "id,c1", false) }() @@ -974,8 +927,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Simple_Schema_Changes_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -991,7 +943,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF(t *testing.T) { PRIMARY KEY(id,t) ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_flow"), @@ -1002,7 +954,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 5, @@ -1019,7 +971,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF(t *testing.T) { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t) VALUES ($1,$2) `, srcTableName), i, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") @@ -1029,9 +981,9 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF(t *testing.T) { _, err := s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -1050,8 +1002,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -1068,7 +1019,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF(t *testing.T) { PRIMARY KEY(id,t) ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast1_flow"), @@ -1079,7 +1030,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 2, @@ -1091,7 +1042,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF(t *testing.T) { go func() { e2e.SetupCDCFlowStatusQuery(env, connectionGen) rowsTx, err := s.pool.Begin(context.Background()) - require.NoError(t, err) + s.NoError(err) // insert 10 rows into the source table for i := 0; i < 10; i++ { @@ -1099,18 +1050,18 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF(t *testing.T) { _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) _, err = rowsTx.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - require.NoError(t, err) + s.NoError(err) err = rowsTx.Commit(context.Background()) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -1129,8 +1080,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -1147,7 +1097,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF(t *testing.T) { PRIMARY KEY(id,t) ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast2_flow"), @@ -1158,7 +1108,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF(t *testing.T) { } flowConnConfig, err := connectionGen.GenerateFlowConnectionConfigs() - require.NoError(t, err) + s.NoError(err) limits := peerflow.CDCFlowLimits{ TotalSyncFlows: 4, @@ -1176,16 +1126,16 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF(t *testing.T) { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(9000)) `, srcTableName), i, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") e2e.NormalizeFlowCountQuery(env, connectionGen, 2) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, flowConnConfig, &limits, nil) @@ -1204,8 +1154,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF(t *testing.T) { env.AssertExpectations(s.T()) } -func (s *PeerFlowE2ETestSuiteSF) Test_Column_Exclusion(t *testing.T) { - t.Parallel() +func (s *PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { env := s.NewTestWorkflowEnvironment() e2e.RegisterWorkflowsAndActivities(env) @@ -1222,7 +1171,7 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Column_Exclusion(t *testing.T) { PRIMARY KEY(id,t) ); `, srcTableName)) - require.NoError(t, err) + s.NoError(err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_exclude_flow"), @@ -1259,20 +1208,20 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Column_Exclusion(t *testing.T) { _, err = s.pool.Exec(context.Background(), fmt.Sprintf(` INSERT INTO %s(c2,t,t2) VALUES ($1,$2,random_string(100)) `, srcTableName), i, testValue) - require.NoError(t, err) + s.NoError(err) } fmt.Println("Inserted 10 rows into the source table") e2e.NormalizeFlowCountQuery(env, connectionGen, 2) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+1 WHERE MOD(c2,2)=$1`, srcTableName), 1) - require.NoError(t, err) + s.NoError(err) _, err = s.pool.Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE MOD(c2,2)=$1`, srcTableName), 0) - require.NoError(t, err) + s.NoError(err) }() env.ExecuteWorkflow(peerflow.CDCFlowWorkflowWithConfig, config, &limits, nil) - require.True(t, env.IsWorkflowCompleted()) + s.True(env.IsWorkflowCompleted()) err = env.GetWorkflowError() s.Error(err) s.Contains(err.Error(), "continue as new") @@ -1280,11 +1229,11 @@ func (s *PeerFlowE2ETestSuiteSF) Test_Column_Exclusion(t *testing.T) { query := fmt.Sprintf("SELECT * FROM %s.%s.test_exclude_sf ORDER BY id", s.sfHelper.testDatabaseName, s.sfHelper.testSchemaName) sfRows, err := s.sfHelper.ExecuteAndProcessQuery(query) - require.NoError(t, err) + s.NoError(err) for _, field := range sfRows.Schema.Fields { - require.NotEqual(t, field.Name, "c2") + s.NotEqual(field.Name, "c2") } - require.Equal(t, 4, len(sfRows.Schema.Fields)) - require.Equal(t, 10, len(sfRows.Records)) + s.Equal(4, len(sfRows.Schema.Fields)) + s.Equal(10, len(sfRows.Records)) } diff --git a/flow/model/qrecord_batch.go b/flow/model/qrecord_batch.go index 25f4f7b20c..27ebc4014e 100644 --- a/flow/model/qrecord_batch.go +++ b/flow/model/qrecord_batch.go @@ -21,31 +21,32 @@ type QRecordBatch struct { // Equals checks if two QRecordBatches are identical. func (q *QRecordBatch) Equals(other *QRecordBatch) bool { if other == nil { + fmt.Printf("other is nil") return q == nil } // First check simple attributes if q.NumRecords != other.NumRecords { // print num records - log.Infof("q.NumRecords: %d\n", q.NumRecords) - log.Infof("other.NumRecords: %d\n", other.NumRecords) + fmt.Printf("q.NumRecords: %d\n", q.NumRecords) + fmt.Printf("other.NumRecords: %d\n", other.NumRecords) return false } // Compare column names if !q.Schema.EqualNames(other.Schema) { - log.Infof("Column names are not equal") - log.Infof("Schema 1: %v", q.Schema.GetColumnNames()) - log.Infof("Schema 2: %v", other.Schema.GetColumnNames()) + fmt.Printf("Column names are not equal\n") + fmt.Printf("Schema 1: %v\n", q.Schema.GetColumnNames()) + fmt.Printf("Schema 2: %v\n", other.Schema.GetColumnNames()) return false } // Compare records for i, record := range q.Records { if !record.equals(other.Records[i]) { - log.Infof("Record %d is not equal", i) - log.Infof("Record 1: %v", record) - log.Infof("Record 2: %v", other.Records[i]) + fmt.Printf("Record %d is not equal\n", i) + fmt.Printf("Record 1: %v\n", record) + fmt.Printf("Record 2: %v\n", other.Records[i]) return false } } From e93258cd1b1aeb7a7eb2860062a01d1860b305d2 Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Tue, 21 Nov 2023 23:29:01 +0530 Subject: [PATCH 09/10] removed function creation by oopsie --- flow/e2e/congen.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index fe1c6c029a..72a756e531 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -94,6 +94,20 @@ func SetupPostgres(suffix string) (*pgxpool.Pool, error) { return nil, fmt.Errorf("failed to create e2e_test schema: %w", err) } + _, err = pool.Exec(context.Background(), ` + CREATE OR REPLACE FUNCTION random_string( int ) RETURNS TEXT as $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', + round(random() * 30)::integer, 1), '') FROM generate_series(1, $1); + $$ language sql; + CREATE OR REPLACE FUNCTION random_bytea(bytea_length integer) + RETURNS bytea AS $body$ + SELECT decode(string_agg(lpad(to_hex(width_bucket(random(), 0, 1, 256)-1),2,'0') ,''), 'hex') + FROM generate_series(1, $1); + $body$ + LANGUAGE 'sql' + VOLATILE + SET search_path = 'pg_catalog'; + `) if err != nil { return nil, fmt.Errorf("failed to create utility functions: %w", err) } From abb795e16eefe0aee01e1f702a58916aad54e79d Mon Sep 17 00:00:00 2001 From: Kevin Biju Date: Wed, 22 Nov 2023 13:09:29 +0530 Subject: [PATCH 10/10] fixed issue, and BigQuery compression is dead --- flow/connectors/bigquery/qrep_avro_sync.go | 4 ++-- flow/e2e/bigquery/peer_flow_bq_test.go | 2 +- flow/e2e/bigquery/qrep_flow_bq_test.go | 6 +++--- flow/e2e/test_utils.go | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index 5b9b99b13d..97c043ac04 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -331,7 +331,7 @@ func (s *QRepAvroSyncMethod) writeToStage( avro.CompressNone, qvalue.QDWHTypeBigQuery) if s.gcsBucket != "" { bucket := s.connector.storageClient.Bucket(s.gcsBucket) - avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", objectFolder, syncID) + avroFilePath = fmt.Sprintf("%s/%s.avro", objectFolder, syncID) obj := bucket.Object(avroFilePath) w := obj.NewWriter(s.connector.ctx) @@ -346,7 +346,7 @@ func (s *QRepAvroSyncMethod) writeToStage( return 0, fmt.Errorf("failed to create temp dir: %w", err) } - avroFilePath = fmt.Sprintf("%s/%s.avro.snappy", tmpDir, syncID) + avroFilePath = fmt.Sprintf("%s/%s.avro", tmpDir, syncID) log.WithFields(log.Fields{ "batchOrPartitionID": syncID, }).Infof("writing records to local file %s", avroFilePath) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index dd4954284b..90fbb552dc 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -253,7 +253,7 @@ func (s *PeerFlowE2ETestSuiteBQ) Test_Complete_Simple_Flow_BQ() { s.NoError(err) limits := peerflow.CDCFlowLimits{ - TotalSyncFlows: 3, + TotalSyncFlows: 2, MaxBatchSize: 100, } diff --git a/flow/e2e/bigquery/qrep_flow_bq_test.go b/flow/e2e/bigquery/qrep_flow_bq_test.go index e110b451da..8a97c3b85b 100644 --- a/flow/e2e/bigquery/qrep_flow_bq_test.go +++ b/flow/e2e/bigquery/qrep_flow_bq_test.go @@ -39,9 +39,9 @@ func (s *PeerFlowE2ETestSuiteBQ) compareTableContentsBQ(tableName string, colsSt // read rows from destination table qualifiedTableName := fmt.Sprintf("`%s.%s`", s.bqHelper.Config.DatasetId, tableName) - bqRows, err := s.bqHelper.ExecuteAndProcessQuery( - fmt.Sprintf("SELECT %s FROM %s ORDER BY id", colsString, qualifiedTableName), - ) + bqSelQuery := fmt.Sprintf("SELECT %s FROM %s ORDER BY id", colsString, qualifiedTableName) + fmt.Printf("running query on bigquery: %s\n", bqSelQuery) + bqRows, err := s.bqHelper.ExecuteAndProcessQuery(bqSelQuery) s.NoError(err) s.True(pgRows.Equals(bqRows), "rows from source and destination tables are not equal") diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 007f502c02..b57ad72214 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -71,7 +71,7 @@ func SetupCDCFlowStatusQuery(env *testsuite.TestWorkflowEnvironment, log.Errorln(err) } - if state.SetupComplete { + if state.SnapshotComplete { break } } else {