From a4e867a2d31a8b904a125b586b5d2c751021dfb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Tue, 31 Oct 2023 16:41:26 -0400 Subject: [PATCH 01/10] begin storing metadata to allow undos --- db/db.go | 4 ++ db/dialect.go | 1 + db/dialect_clickhouse.go | 30 ++++++++++ db/dialect_postgres.go | 109 +++++++++++++++++++++++++++++++----- db/dialect_postgres_test.go | 93 ++++++++++++++++++++++++++++++ db/operations.go | 30 +++++++++- db/ops.go | 12 ++-- sinker/sinker.go | 13 +++-- 8 files changed, 264 insertions(+), 28 deletions(-) create mode 100644 db/dialect_postgres_test.go diff --git a/db/db.go b/db/db.go index fd5dcd7..6f7f9fd 100644 --- a/db/db.go +++ b/db/db.go @@ -278,6 +278,10 @@ func (l *Loader) GetCreateCursorsTableSQL(withPostgraphile bool) string { return l.getDialect().GetCreateCursorQuery(l.schema, withPostgraphile) } +func (l *Loader) GetCreateSubstreamsHistoryTableSQL() string { + return l.getDialect().GetCreateSubstreamsHistoryTableQuery(l.schema) +} + func (l *Loader) getDialect() dialect { d, _ := l.tryDialect() return d diff --git a/db/dialect.go b/db/dialect.go index 1014c5b..b0d9e4f 100644 --- a/db/dialect.go +++ b/db/dialect.go @@ -19,6 +19,7 @@ func (e UnknownDriverError) Error() string { type dialect interface { GetCreateCursorQuery(schema string, withPostgraphile bool) string + GetCreateSubstreamsHistoryTableQuery(schema string) string ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error DriverSupportRowsAffected() bool GetUpdateCursorQuery(table, moduleHash string, cursor *sink.Cursor, block_num uint64, block_id string) string diff --git a/db/dialect_clickhouse.go b/db/dialect_clickhouse.go index 835bcab..deffbca 100644 --- a/db/dialect_clickhouse.go +++ b/db/dialect_clickhouse.go @@ -94,6 +94,36 @@ func (d clickhouseDialect) GetCreateCursorQuery(schema string, withPostgraphile `), EscapeIdentifier(schema), EscapeIdentifier("cursors")) } +func (d clickhouseDialect) GetCreateSubstreamsHistoryTableQuery(schema string) string { + out := fmt.Sprintf(cli.Dedent(` + create table if not exists %s.%s + ( + table_name text, + id text, + block_num bigint + ) Engine = ReplacingMergeTree() ORDER BY block_num; + create table if not exists %s.%s + ( + table_name text, + id text, + prev_value text, + block_num bigint + ) Engine = ReplacingMergeTree() ORDER BY block_num; + create table if not exists %s.%s + ( + table_name text, + id text, + prev_value text, + block_num bigint + ) Engine = ReplacingMergeTree() ORDER BY block_num; + `), + EscapeIdentifier(schema), EscapeIdentifier("inserts_history"), + EscapeIdentifier(schema), EscapeIdentifier("updates_history"), + EscapeIdentifier(schema), EscapeIdentifier("deletes_history"), + ) + return out +} + func (d clickhouseDialect) ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error { for _, query := range strings.Split(schemaSql, ";") { if len(strings.TrimSpace(query)) == 0 { diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index 3b2599a..965bb28 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -29,7 +29,7 @@ func (d postgresDialect) Flush(tx *sql.Tx, ctx context.Context, l *Loader, outpu for entryPair := entries.Oldest(); entryPair != nil; entryPair = entryPair.Next() { entry := entryPair.Value - query, err := d.prepareStatement(entry) + query, err := d.prepareStatement(l.schema, entry) if err != nil { return 0, fmt.Errorf("failed to prepare statement: %w", err) } @@ -65,6 +65,36 @@ func (d postgresDialect) GetCreateCursorQuery(schema string, withPostgraphile bo return out } +func (d postgresDialect) GetCreateSubstreamsHistoryTableQuery(schema string) string { + out := fmt.Sprintf(cli.Dedent(` + create table if not exists %s + ( + table_name text, + id text, + block_num bigint + ); + create table if not exists %s + ( + table_name text, + id text, + prev_value text, + block_num bigint + ); + create table if not exists %s + ( + table_name text, + id text, + prev_value text, + block_num bigint + ); + `), + d.insertsTable(schema), + d.updatesTable(schema), + d.deletesTable(schema), + ) + return out +} + func (d postgresDialect) ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error { if _, err := l.ExecContext(ctx, schemaSql); err != nil { return fmt.Errorf("exec schema: %w", err) @@ -90,7 +120,57 @@ func (d postgresDialect) OnlyInserts() bool { return false } -func (d *postgresDialect) prepareStatement(o *Operation) (string, error) { +func (d postgresDialect) insertsTable(schema string) string { + return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("inserts_history")) +} + +func (d postgresDialect) saveInsert(schema string, table string, primaryKey map[string]string, blockNum uint64) string { + return fmt.Sprintf(`INSERT INTO %s (table_name, id, block_num) values (%s, %s, %d);`, + d.insertsTable(schema), + escapeStringValue(table), + escapeStringValue(primaryKeyToJSON(primaryKey)), + blockNum, + ) +} + +func (d postgresDialect) updatesTable(schema string) string { + return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("updates_history")) +} + +func (d postgresDialect) saveUpdate(schema string, table string, primaryKey map[string]string, blockNum uint64) string { + return d.saveRow(table, d.updatesTable(schema), primaryKey, blockNum) +} + +func (d postgresDialect) deletesTable(schema string) string { + return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("deletes_history")) +} + +func (d postgresDialect) saveDelete(schema string, table string, primaryKey map[string]string, blockNum uint64) string { + return d.saveRow(table, d.deletesTable(schema), primaryKey, blockNum) +} + +func (d postgresDialect) saveRow(table string, targetTable string, primaryKey map[string]string, blockNum uint64) string { + // insert into deletes_history (table_name, id, prev_value, block_num) + // select 'ownership_transferred', + // '["evt_tx_hash":"00006614dade7f56557b84e5fe674a264a50e83eec52ccec62c9fff4c2de4a2a","evt_index":"132"]', + // row_to_json(ownership_transferred), + // 12345678 from ownership_transferred + // where evt_tx_hash = '22199329b0aa1aa68902a78e3b32ca327c872fab166c7a2838273de6ad383eba' and evt_index = 249 + + return fmt.Sprintf(`INSERT INTO %s (table_name, id, prev_value, block_num) + SELECT %s, %s, row_to_json(%s), %d + FROM %s + WHERE %s`, + + targetTable, + escapeStringValue(table), escapeStringValue(primaryKeyToJSON(primaryKey)), EscapeIdentifier(table), blockNum, + EscapeIdentifier(table), + getPrimaryKeyWhereClause(primaryKey), + ) + +} + +func (d *postgresDialect) prepareStatement(schema string, o *Operation) (string, error) { var columns, values []string if o.opType == OperationTypeInsert || o.opType == OperationTypeUpdate { var err error @@ -109,11 +189,11 @@ func (d *postgresDialect) prepareStatement(o *Operation) (string, error) { switch o.opType { case OperationTypeInsert: - return fmt.Sprintf("INSERT INTO %s (%s) VALUES (%s)", + return fmt.Sprintf("INSERT INTO %s (%s) VALUES (%s);", o.table.identifier, strings.Join(columns, ","), strings.Join(values, ","), - ), nil + ) + d.saveInsert(schema, o.table.identifier, o.primaryKey, o.blockNum), nil case OperationTypeUpdate: updates := make([]string, len(columns)) @@ -122,18 +202,21 @@ func (d *postgresDialect) prepareStatement(o *Operation) (string, error) { } primaryKeySelector := getPrimaryKeyWhereClause(o.primaryKey) - return fmt.Sprintf("UPDATE %s SET %s WHERE %s", - o.table.identifier, - strings.Join(updates, ", "), - primaryKeySelector, - ), nil + + return d.saveUpdate(schema, o.table.identifier, o.primaryKey, o.blockNum) + + fmt.Sprintf("UPDATE %s SET %s WHERE %s", + o.table.identifier, + strings.Join(updates, ", "), + primaryKeySelector, + ), nil case OperationTypeDelete: primaryKeyWhereClause := getPrimaryKeyWhereClause(o.primaryKey) - return fmt.Sprintf("DELETE FROM %s WHERE %s", - o.table.identifier, - primaryKeyWhereClause, - ), nil + return d.saveDelete(schema, o.table.identifier, o.primaryKey, o.blockNum) + + fmt.Sprintf("DELETE FROM %s WHERE %s", + o.table.identifier, + primaryKeyWhereClause, + ), nil default: panic(fmt.Errorf("unknown operation type %q", o.opType)) diff --git a/db/dialect_postgres_test.go b/db/dialect_postgres_test.go new file mode 100644 index 0000000..75368f8 --- /dev/null +++ b/db/dialect_postgres_test.go @@ -0,0 +1,93 @@ +package db + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPrimaryKeyToJSON(t *testing.T) { + + tests := []struct { + name string + keys map[string]string + expect string + }{ + { + name: "single key", + keys: map[string]string{ + "id": "0xdeadbeef", + }, + expect: `{"id":"0xdeadbeef"}`, + }, + { + name: "two keys", + keys: map[string]string{ + "hash": "0xdeadbeef", + "idx": "5", + }, + expect: `{"hash":"0xdeadbeef","idx":"5"}`, + }, + { + name: "determinism", + keys: map[string]string{ + "bbb": "1", + "ccc": "2", + "aaa": "3", + "ddd": "4", + }, + expect: `{"aaa":"3","bbb":"1","ccc":"2","ddd":"4"}`, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + jsonKey := primaryKeyToJSON(test.keys) + assert.Equal(t, test.expect, jsonKey) + }) + } + +} + +func TestJSONToPrimaryKey(t *testing.T) { + + tests := []struct { + name string + in string + expect map[string]string + }{ + { + name: "single key", + in: `{"id":"0xdeadbeef"}`, + expect: map[string]string{ + "id": "0xdeadbeef", + }, + }, + { + name: "two keys", + in: `{"hash":"0xdeadbeef","idx":"5"}`, + expect: map[string]string{ + "hash": "0xdeadbeef", + "idx": "5", + }, + }, + { + name: "determinism", + in: `{"aaa":"3","bbb":"1","ccc":"2","ddd":"4"}`, + expect: map[string]string{ + "bbb": "1", + "ccc": "2", + "aaa": "3", + "ddd": "4", + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + out, err := jsonToPrimaryKey(test.in) + require.NoError(t, err) + assert.Equal(t, test.expect, out) + }) + } + +} diff --git a/db/operations.go b/db/operations.go index 5acb356..d2c4e12 100644 --- a/db/operations.go +++ b/db/operations.go @@ -1,6 +1,7 @@ package db import ( + "encoding/json" "fmt" "reflect" "regexp" @@ -27,35 +28,39 @@ type Operation struct { opType OperationType primaryKey map[string]string data map[string]string + blockNum uint64 } func (o *Operation) String() string { return fmt.Sprintf("%s/%s (%s)", o.table.identifier, createRowUniqueID(o.primaryKey), strings.ToLower(string(o.opType))) } -func (l *Loader) newInsertOperation(table *TableInfo, primaryKey map[string]string, data map[string]string) *Operation { +func (l *Loader) newInsertOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, blockNum uint64) *Operation { return &Operation{ table: table, opType: OperationTypeInsert, primaryKey: primaryKey, data: data, + blockNum: blockNum, } } -func (l *Loader) newUpdateOperation(table *TableInfo, primaryKey map[string]string, data map[string]string) *Operation { +func (l *Loader) newUpdateOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, blockNum uint64) *Operation { return &Operation{ table: table, opType: OperationTypeUpdate, primaryKey: primaryKey, data: data, + blockNum: blockNum, } } -func (l *Loader) newDeleteOperation(table *TableInfo, primaryKey map[string]string) *Operation { +func (l *Loader) newDeleteOperation(table *TableInfo, primaryKey map[string]string, blockNum uint64) *Operation { return &Operation{ table: table, opType: OperationTypeDelete, primaryKey: primaryKey, + blockNum: blockNum, } } @@ -88,3 +93,22 @@ func escapeStringValue(valueToEscape string) string { return `'` + valueToEscape + `'` } + +// to store in an history table +func primaryKeyToJSON(primaryKey map[string]string) string { + m, err := json.Marshal(primaryKey) + if err != nil { + panic(err) // should never happen with map[string]string + } + return string(m) +} + +// to store in an history table +func jsonToPrimaryKey(in string) (map[string]string, error) { + out := make(map[string]string) + err := json.Unmarshal([]byte(in), &out) + if err != nil { + return nil, err + } + return out, nil +} diff --git a/db/ops.go b/db/ops.go index 7d8fcc7..4ba051e 100644 --- a/db/ops.go +++ b/db/ops.go @@ -10,7 +10,7 @@ import ( // Insert a row in the DB, it is assumed the table exists, you can do a // check before with HasTable() -func (l *Loader) Insert(tableName string, primaryKey map[string]string, data map[string]string) error { +func (l *Loader) Insert(tableName string, primaryKey map[string]string, data map[string]string, blockNum uint64) error { uniqueID := createRowUniqueID(primaryKey) if l.tracer.Enabled() { @@ -47,7 +47,7 @@ func (l *Loader) Insert(tableName string, primaryKey map[string]string, data map } } - entry.Set(uniqueID, l.newInsertOperation(table, primaryKey, data)) + entry.Set(uniqueID, l.newInsertOperation(table, primaryKey, data, blockNum)) l.entriesCount++ return nil } @@ -92,7 +92,7 @@ func (l *Loader) GetPrimaryKey(tableName string, pk string) (map[string]string, // Update a row in the DB, it is assumed the table exists, you can do a // check before with HasTable() -func (l *Loader) Update(tableName string, primaryKey map[string]string, data map[string]string) error { +func (l *Loader) Update(tableName string, primaryKey map[string]string, data map[string]string, blockNum uint64) error { if l.getDialect().OnlyInserts() { return fmt.Errorf("update operation is not supported by the current database") } @@ -141,13 +141,13 @@ func (l *Loader) Update(tableName string, primaryKey map[string]string, data map l.logger.Debug("primary key entry never existed for table, adding update operation", zap.String("primary_key", uniqueID), zap.String("table_name", tableName)) } - entry.Set(uniqueID, l.newUpdateOperation(table, primaryKey, data)) + entry.Set(uniqueID, l.newUpdateOperation(table, primaryKey, data, blockNum)) return nil } // Delete a row in the DB, it is assumed the table exists, you can do a // check before with HasTable() -func (l *Loader) Delete(tableName string, primaryKey map[string]string) error { +func (l *Loader) Delete(tableName string, primaryKey map[string]string, blockNum uint64) error { if l.getDialect().OnlyInserts() { return fmt.Errorf("delete operation is not supported by the current database") } @@ -188,6 +188,6 @@ func (l *Loader) Delete(tableName string, primaryKey map[string]string) error { l.logger.Debug("adding deleting operation", zap.String("primary_key", uniqueID), zap.String("table_name", tableName)) } - entry.Set(uniqueID, l.newDeleteOperation(table, primaryKey)) + entry.Set(uniqueID, l.newDeleteOperation(table, primaryKey, blockNum)) return nil } diff --git a/sinker/sinker.go b/sinker/sinker.go index 9cc2d31..21d72dd 100644 --- a/sinker/sinker.go +++ b/sinker/sinker.go @@ -96,6 +96,7 @@ func (s *SQLSinker) Run(ctx context.Context) { func (s *SQLSinker) HandleBlockScopedData(ctx context.Context, data *pbsubstreamsrpc.BlockScopedData, isLive *bool, cursor *sink.Cursor) error { output := data.Output + blockNum := cursor.Block().Num() if output.Name != s.OutputModuleName() { return fmt.Errorf("received data from wrong output module, expected to received from %q but got module's output for %q", s.OutputModuleName(), output.Name) @@ -114,11 +115,11 @@ func (s *SQLSinker) HandleBlockScopedData(ctx context.Context, data *pbsubstream return fmt.Errorf("unmarshal database changes: %w", err) } - if err := s.applyDatabaseChanges(dbChanges); err != nil { + if err := s.applyDatabaseChanges(dbChanges, blockNum); err != nil { return fmt.Errorf("apply database changes: %w", err) } - if cursor.Block().Num()%s.batchBlockModulo(data, isLive) == 0 { + if blockNum%s.batchBlockModulo(data, isLive) == 0 { flushStart := time.Now() rowFlushedCount, err := s.loader.Flush(ctx, s.OutputModuleHash(), cursor) if err != nil { @@ -146,7 +147,7 @@ func (s *SQLSinker) HandleBlockScopedData(ctx context.Context, data *pbsubstream return nil } -func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges) error { +func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges, blockNum uint64) error { for _, change := range dbChanges.TableChanges { if !s.loader.HasTable(change.Table) { return fmt.Errorf( @@ -178,17 +179,17 @@ func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges) switch change.Operation { case pbdatabase.TableChange_CREATE: - err := s.loader.Insert(change.Table, primaryKeys, changes) + err := s.loader.Insert(change.Table, primaryKeys, changes, blockNum) if err != nil { return fmt.Errorf("database insert: %w", err) } case pbdatabase.TableChange_UPDATE: - err := s.loader.Update(change.Table, primaryKeys, changes) + err := s.loader.Update(change.Table, primaryKeys, changes, blockNum) if err != nil { return fmt.Errorf("database update: %w", err) } case pbdatabase.TableChange_DELETE: - err := s.loader.Delete(change.Table, primaryKeys) + err := s.loader.Delete(change.Table, primaryKeys, blockNum) if err != nil { return fmt.Errorf("database delete: %w", err) } From 4e8647426d26bf195ec8dae1d331bda65f7779b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Wed, 1 Nov 2023 14:57:59 -0400 Subject: [PATCH 02/10] add testing, manage finalBlocks for undo --- db/cursor.go | 4 +- db/db.go | 19 ++++ db/dialect.go | 3 +- db/dialect_clickhouse.go | 4 +- db/dialect_postgres.go | 69 ++++++++---- db/flush.go | 6 +- db/operations.go | 44 ++++---- db/ops.go | 12 +- db/testing.go | 85 ++++++++++++++ go.mod | 4 +- go.sum | 8 +- sinker/sinker.go | 20 ++-- sinker/sinker_test.go | 233 +++++++++++++++++++++++++++++++++++++++ 13 files changed, 441 insertions(+), 70 deletions(-) create mode 100644 db/testing.go create mode 100644 sinker/sinker_test.go diff --git a/db/cursor.go b/db/cursor.go index 439054a..a946a8b 100644 --- a/db/cursor.go +++ b/db/cursor.go @@ -118,7 +118,7 @@ func (l *Loader) InsertCursor(ctx context.Context, moduleHash string, c *sink.Cu // UpdateCursor updates the active cursor. If no cursor is active and no update occurred, returns // ErrCursorNotFound. If the update was not successful on the database, returns an error. // You can use tx=nil to run the query outside of a transaction. -func (l *Loader) UpdateCursor(ctx context.Context, tx *sql.Tx, moduleHash string, c *sink.Cursor) error { +func (l *Loader) UpdateCursor(ctx context.Context, tx Tx, moduleHash string, c *sink.Cursor) error { _, err := l.runModifiyQuery(ctx, tx, "update", l.getDialect().GetUpdateCursorQuery( l.cursorTable.identifier, moduleHash, c, c.Block().Num(), c.Block().ID(), )) @@ -152,7 +152,7 @@ type sqlExecutor interface { // // If `tx` is nil, we use `l.DB` as the execution context, so an operations happening outside // a transaction. Otherwise, tx is the execution context. -func (l *Loader) runModifiyQuery(ctx context.Context, tx *sql.Tx, action string, query string) (rowsAffected int64, err error) { +func (l *Loader) runModifiyQuery(ctx context.Context, tx Tx, action string, query string) (rowsAffected int64, err error) { var executor sqlExecutor = l.DB if tx != nil { executor = tx diff --git a/db/db.go b/db/db.go index 6f7f9fd..def8047 100644 --- a/db/db.go +++ b/db/db.go @@ -44,6 +44,8 @@ type Loader struct { logger *zap.Logger tracer logging.Tracer + + testTx *TestTx // used for testing: if non-nil, 'loader.BeginTx()' will return this object instead of a real *sql.Tx } func NewLoader( @@ -92,6 +94,23 @@ func NewLoader( return l, nil } +type Tx interface { + Rollback() error + Commit() error + ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) +} + +func (l *Loader) Begin() (Tx, error) { + return l.BeginTx(context.Background(), nil) +} + +func (l *Loader) BeginTx(ctx context.Context, opts *sql.TxOptions) (Tx, error) { + if l.testTx != nil { + return l.testTx, nil + } + return l.DB.BeginTx(ctx, opts) +} + func (l *Loader) FlushInterval() time.Duration { return l.flushInterval } diff --git a/db/dialect.go b/db/dialect.go index b0d9e4f..5a60884 100644 --- a/db/dialect.go +++ b/db/dialect.go @@ -2,7 +2,6 @@ package db import ( "context" - "database/sql" "fmt" sink "github.com/streamingfast/substreams-sink" @@ -24,7 +23,7 @@ type dialect interface { DriverSupportRowsAffected() bool GetUpdateCursorQuery(table, moduleHash string, cursor *sink.Cursor, block_num uint64, block_id string) string ParseDatetimeNormalization(value string) string - Flush(tx *sql.Tx, ctx context.Context, l *Loader, outputModuleHash string, cursor *sink.Cursor) (int, error) + Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) OnlyInserts() bool } diff --git a/db/dialect_clickhouse.go b/db/dialect_clickhouse.go index deffbca..e03a9a0 100644 --- a/db/dialect_clickhouse.go +++ b/db/dialect_clickhouse.go @@ -2,7 +2,6 @@ package db import ( "context" - "database/sql" "fmt" "math/big" "reflect" @@ -23,7 +22,7 @@ type clickhouseDialect struct{} // Clickhouse should be used to insert a lot of data in batches. The current official clickhouse // driver doesn't support Transactions for multiple tables. The only way to add in batches is // creating a transaction for a table, adding all rows and commiting it. -func (d clickhouseDialect) Flush(tx *sql.Tx, ctx context.Context, l *Loader, outputModuleHash string, cursor *sink.Cursor) (int, error) { +func (d clickhouseDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) { var entryCount int for entriesPair := l.entries.Oldest(); entriesPair != nil; entriesPair = entriesPair.Next() { tableName := entriesPair.Key @@ -77,6 +76,7 @@ func (d clickhouseDialect) Flush(tx *sql.Tx, ctx context.Context, l *Loader, out } entryCount += entries.Len() } + // TODO: implement pruning return entryCount, nil } diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index 965bb28..d2c6d56 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -2,9 +2,9 @@ package db import ( "context" - "database/sql" "fmt" "reflect" + "sort" "strconv" "strings" "time" @@ -17,7 +17,7 @@ import ( type postgresDialect struct{} -func (d postgresDialect) Flush(tx *sql.Tx, ctx context.Context, l *Loader, outputModuleHash string, cursor *sink.Cursor) (int, error) { +func (d postgresDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) { var rowCount int for entriesPair := l.entries.Oldest(); entriesPair != nil; entriesPair = entriesPair.Next() { tableName := entriesPair.Key @@ -45,9 +45,25 @@ func (d postgresDialect) Flush(tx *sql.Tx, ctx context.Context, l *Loader, outpu rowCount += entries.Len() } + if err := d.pruneReversibleSegment(tx, ctx, l.schema, lastFinalBlock); err != nil { + return 0, err + } + return rowCount, nil } +func (d postgresDialect) pruneReversibleSegment(tx Tx, ctx context.Context, schema string, highestFinalBlock uint64) error { + pruneInserts := fmt.Sprintf(`DELETE FROM %s WHERE block_num <= %d;`, d.insertsTable(schema), highestFinalBlock) + pruneUpdates := fmt.Sprintf(`DELETE FROM %s WHERE block_num <= %d;`, d.updatesTable(schema), highestFinalBlock) + pruneDeletes := fmt.Sprintf(`DELETE FROM %s WHERE block_num <= %d;`, d.deletesTable(schema), highestFinalBlock) + query := pruneInserts + pruneUpdates + pruneDeletes + + if _, err := tx.ExecContext(ctx, query); err != nil { + return fmt.Errorf("executing prune query %q: %w", query, err) + } + return nil +} + func (d postgresDialect) GetCreateCursorQuery(schema string, withPostgraphile bool) string { out := fmt.Sprintf(cli.Dedent(` create table if not exists %s.%s @@ -189,11 +205,15 @@ func (d *postgresDialect) prepareStatement(schema string, o *Operation) (string, switch o.opType { case OperationTypeInsert: - return fmt.Sprintf("INSERT INTO %s (%s) VALUES (%s);", + insertQuery := fmt.Sprintf("INSERT INTO %s (%s) VALUES (%s);", o.table.identifier, strings.Join(columns, ","), strings.Join(values, ","), - ) + d.saveInsert(schema, o.table.identifier, o.primaryKey, o.blockNum), nil + ) + if o.reversibleBlockNum != nil { + return d.saveInsert(schema, o.table.identifier, o.primaryKey, *o.reversibleBlockNum) + insertQuery, nil + } + return insertQuery, nil case OperationTypeUpdate: updates := make([]string, len(columns)) @@ -203,20 +223,27 @@ func (d *postgresDialect) prepareStatement(schema string, o *Operation) (string, primaryKeySelector := getPrimaryKeyWhereClause(o.primaryKey) - return d.saveUpdate(schema, o.table.identifier, o.primaryKey, o.blockNum) + - fmt.Sprintf("UPDATE %s SET %s WHERE %s", - o.table.identifier, - strings.Join(updates, ", "), - primaryKeySelector, - ), nil + updateQuery := fmt.Sprintf("UPDATE %s SET %s WHERE %s", + o.table.identifier, + strings.Join(updates, ", "), + primaryKeySelector, + ) + + if o.reversibleBlockNum != nil { + return d.saveUpdate(schema, o.table.identifier, o.primaryKey, *o.reversibleBlockNum) + updateQuery, nil + } + return updateQuery, nil case OperationTypeDelete: primaryKeyWhereClause := getPrimaryKeyWhereClause(o.primaryKey) - return d.saveDelete(schema, o.table.identifier, o.primaryKey, o.blockNum) + - fmt.Sprintf("DELETE FROM %s WHERE %s", - o.table.identifier, - primaryKeyWhereClause, - ), nil + deleteQuery := fmt.Sprintf("DELETE FROM %s WHERE %s", + o.table.identifier, + primaryKeyWhereClause, + ) + if o.reversibleBlockNum != nil { + return d.saveDelete(schema, o.table.identifier, o.primaryKey, *o.reversibleBlockNum) + deleteQuery, nil + } + return deleteQuery, nil default: panic(fmt.Errorf("unknown operation type %q", o.opType)) @@ -232,7 +259,14 @@ func (d *postgresDialect) prepareColValues(table *TableInfo, colValues map[strin values = make([]string, len(colValues)) i := 0 - for columnName, value := range colValues { + for colName := range colValues { + columns[i] = colName + i++ + } + sort.Strings(columns) // sorted for determinism in tests + + for i, columnName := range columns { + value := colValues[columnName] columnInfo, found := table.columnsByName[columnName] if !found { return nil, nil, fmt.Errorf("cannot find column %q for table %q (valid columns are %q)", columnName, table.identifier, strings.Join(maps.Keys(table.columnsByName), ", ")) @@ -243,10 +277,7 @@ func (d *postgresDialect) prepareColValues(table *TableInfo, colValues map[strin return nil, nil, fmt.Errorf("getting sql value from table %s for column %q raw value %q: %w", table.identifier, columnName, value, err) } - columns[i] = columnInfo.escapedName values[i] = normalizedValue - - i++ } return } diff --git a/db/flush.go b/db/flush.go index 3c2b5b4..4bee1d4 100644 --- a/db/flush.go +++ b/db/flush.go @@ -10,11 +10,11 @@ import ( "go.uber.org/zap" ) -func (l *Loader) Flush(ctx context.Context, outputModuleHash string, cursor *sink.Cursor) (rowFlushedCount int, err error) { +func (l *Loader) Flush(ctx context.Context, outputModuleHash string, cursor *sink.Cursor, lastFinalBlock uint64) (rowFlushedCount int, err error) { ctx = clickhouse.Context(context.Background(), clickhouse.WithStdAsync(false)) startAt := time.Now() - tx, err := l.DB.BeginTx(ctx, nil) + tx, err := l.BeginTx(ctx, nil) if err != nil { return 0, fmt.Errorf("failed to being db transaction: %w", err) } @@ -26,7 +26,7 @@ func (l *Loader) Flush(ctx context.Context, outputModuleHash string, cursor *sin } }() - rowFlushedCount, err = l.getDialect().Flush(tx, ctx, l, outputModuleHash, cursor) + rowFlushedCount, err = l.getDialect().Flush(tx, ctx, l, outputModuleHash, lastFinalBlock) if err != nil { return 0, fmt.Errorf("dialect flush: %w", err) } diff --git a/db/operations.go b/db/operations.go index d2c4e12..34302c7 100644 --- a/db/operations.go +++ b/db/operations.go @@ -24,43 +24,43 @@ const ( ) type Operation struct { - table *TableInfo - opType OperationType - primaryKey map[string]string - data map[string]string - blockNum uint64 + table *TableInfo + opType OperationType + primaryKey map[string]string + data map[string]string + reversibleBlockNum *uint64 // nil if that block is known to be irreversible } func (o *Operation) String() string { return fmt.Sprintf("%s/%s (%s)", o.table.identifier, createRowUniqueID(o.primaryKey), strings.ToLower(string(o.opType))) } -func (l *Loader) newInsertOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, blockNum uint64) *Operation { +func (l *Loader) newInsertOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, reversibleBlockNum *uint64) *Operation { return &Operation{ - table: table, - opType: OperationTypeInsert, - primaryKey: primaryKey, - data: data, - blockNum: blockNum, + table: table, + opType: OperationTypeInsert, + primaryKey: primaryKey, + data: data, + reversibleBlockNum: reversibleBlockNum, } } -func (l *Loader) newUpdateOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, blockNum uint64) *Operation { +func (l *Loader) newUpdateOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, reversibleBlockNum *uint64) *Operation { return &Operation{ - table: table, - opType: OperationTypeUpdate, - primaryKey: primaryKey, - data: data, - blockNum: blockNum, + table: table, + opType: OperationTypeUpdate, + primaryKey: primaryKey, + data: data, + reversibleBlockNum: reversibleBlockNum, } } -func (l *Loader) newDeleteOperation(table *TableInfo, primaryKey map[string]string, blockNum uint64) *Operation { +func (l *Loader) newDeleteOperation(table *TableInfo, primaryKey map[string]string, reversibleBlockNum *uint64) *Operation { return &Operation{ - table: table, - opType: OperationTypeDelete, - primaryKey: primaryKey, - blockNum: blockNum, + table: table, + opType: OperationTypeDelete, + primaryKey: primaryKey, + reversibleBlockNum: reversibleBlockNum, } } diff --git a/db/ops.go b/db/ops.go index 4ba051e..5c35619 100644 --- a/db/ops.go +++ b/db/ops.go @@ -10,7 +10,7 @@ import ( // Insert a row in the DB, it is assumed the table exists, you can do a // check before with HasTable() -func (l *Loader) Insert(tableName string, primaryKey map[string]string, data map[string]string, blockNum uint64) error { +func (l *Loader) Insert(tableName string, primaryKey map[string]string, data map[string]string, reversibleBlockNum *uint64) error { uniqueID := createRowUniqueID(primaryKey) if l.tracer.Enabled() { @@ -47,7 +47,7 @@ func (l *Loader) Insert(tableName string, primaryKey map[string]string, data map } } - entry.Set(uniqueID, l.newInsertOperation(table, primaryKey, data, blockNum)) + entry.Set(uniqueID, l.newInsertOperation(table, primaryKey, data, reversibleBlockNum)) l.entriesCount++ return nil } @@ -92,7 +92,7 @@ func (l *Loader) GetPrimaryKey(tableName string, pk string) (map[string]string, // Update a row in the DB, it is assumed the table exists, you can do a // check before with HasTable() -func (l *Loader) Update(tableName string, primaryKey map[string]string, data map[string]string, blockNum uint64) error { +func (l *Loader) Update(tableName string, primaryKey map[string]string, data map[string]string, reversibleBlockNum *uint64) error { if l.getDialect().OnlyInserts() { return fmt.Errorf("update operation is not supported by the current database") } @@ -141,13 +141,13 @@ func (l *Loader) Update(tableName string, primaryKey map[string]string, data map l.logger.Debug("primary key entry never existed for table, adding update operation", zap.String("primary_key", uniqueID), zap.String("table_name", tableName)) } - entry.Set(uniqueID, l.newUpdateOperation(table, primaryKey, data, blockNum)) + entry.Set(uniqueID, l.newUpdateOperation(table, primaryKey, data, reversibleBlockNum)) return nil } // Delete a row in the DB, it is assumed the table exists, you can do a // check before with HasTable() -func (l *Loader) Delete(tableName string, primaryKey map[string]string, blockNum uint64) error { +func (l *Loader) Delete(tableName string, primaryKey map[string]string, reversibleBlockNum *uint64) error { if l.getDialect().OnlyInserts() { return fmt.Errorf("delete operation is not supported by the current database") } @@ -188,6 +188,6 @@ func (l *Loader) Delete(tableName string, primaryKey map[string]string, blockNum l.logger.Debug("adding deleting operation", zap.String("primary_key", uniqueID), zap.String("table_name", tableName)) } - entry.Set(uniqueID, l.newDeleteOperation(table, primaryKey, blockNum)) + entry.Set(uniqueID, l.newDeleteOperation(table, primaryKey, reversibleBlockNum)) return nil } diff --git a/db/testing.go b/db/testing.go new file mode 100644 index 0000000..2da2a98 --- /dev/null +++ b/db/testing.go @@ -0,0 +1,85 @@ +package db + +import ( + "context" + "database/sql" + + "github.com/streamingfast/logging" + "go.uber.org/zap" +) + +func NewTestLoader( + zlog *zap.Logger, + tracer logging.Tracer, + schema string, + tables map[string]*TableInfo, +) (*Loader, *TestTx) { + + loader, err := NewLoader("psql://x:5432/x", 0, OnModuleHashMismatchIgnore, zlog, tracer) + if err != nil { + panic(err) + } + loader.testTx = &TestTx{} + loader.tables = tables + loader.schema = schema + loader.cursorTable = tables["cursors"] + return loader, loader.testTx + +} + +func TestTables(schema string) map[string]*TableInfo { + return map[string]*TableInfo{ + "xfer": mustNewTableInfo(schema, "xfer", []string{"id"}, map[string]*ColumnInfo{ + "id": NewColumnInfo("id", "text", ""), + "from": NewColumnInfo("from", "text", ""), + "to": NewColumnInfo("to", "text", ""), + }), + "cursors": mustNewTableInfo(schema, "cursors", []string{"id"}, map[string]*ColumnInfo{ + "block_num": NewColumnInfo("id", "int64", ""), + "block_id": NewColumnInfo("from", "text", ""), + "cursor": NewColumnInfo("cursor", "text", ""), + "id": NewColumnInfo("id", "text", ""), + }), + } +} + +func mustNewTableInfo(schema, name string, pkList []string, columnsByName map[string]*ColumnInfo) *TableInfo { + ti, err := NewTableInfo(schema, name, pkList, columnsByName) + if err != nil { + panic(err) + } + return ti +} + +type TestTx struct { + queries []string +} + +func (t *TestTx) Rollback() error { + t.queries = append(t.queries, "ROLLBACK") + return nil +} + +func (t *TestTx) Commit() error { + t.queries = append(t.queries, "COMMIT") + return nil +} + +func (t *TestTx) ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) { + t.queries = append(t.queries, query) + return &testResult{}, nil +} + +func (t *TestTx) Results() []string { + return t.queries +} + +type testResult struct{} + +func (t *testResult) LastInsertId() (int64, error) { + return 0, nil +} + +func (t *testResult) RowsAffected() (int64, error) { + return 1, nil +} diff --git a/go.mod b/go.mod index 59e667c..bff4b2b 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.15.0 github.com/streamingfast/logging v0.0.0-20230608130331-f22c91403091 - github.com/streamingfast/substreams v1.1.15-0.20231005155216-0f07427759df + github.com/streamingfast/substreams v1.1.18 github.com/streamingfast/substreams-sink v0.3.3-0.20230901183759-218c1d9ec645 github.com/streamingfast/substreams-sink-database-changes v1.1.3 github.com/stretchr/testify v1.8.4 @@ -139,7 +139,7 @@ require ( github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.0 // indirect github.com/streamingfast/atm v0.0.0-20220131151839-18c87005e680 // indirect - github.com/streamingfast/bstream v0.0.2-0.20230731165201-639b4f347707 + github.com/streamingfast/bstream v0.0.2-0.20230829131224-b9272048dc6a github.com/streamingfast/cli v0.0.4-0.20230825151644-8cc84512cd80 github.com/streamingfast/dbin v0.0.0-20210809205249-73d5eca35dc5 // indirect github.com/streamingfast/dgrpc v0.0.0-20230929132851-893fc52687fa // indirect diff --git a/go.sum b/go.sum index c3cdac0..ac6703c 100644 --- a/go.sum +++ b/go.sum @@ -1167,8 +1167,8 @@ github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/streamingfast/atm v0.0.0-20220131151839-18c87005e680 h1:fGJnUx0shX9Y312QOlz+/+yLquihXRhNqctJ26jtZZM= github.com/streamingfast/atm v0.0.0-20220131151839-18c87005e680/go.mod h1:iISPGAstbUsPgyC3auLLi7PYUTi9lHv5z0COam0OPOY= -github.com/streamingfast/bstream v0.0.2-0.20230731165201-639b4f347707 h1:hJW+QNNJrR1boQuoEaajlMFjWh0XKt4Fcg33h9hT7Eo= -github.com/streamingfast/bstream v0.0.2-0.20230731165201-639b4f347707/go.mod h1:Njkx972HcZiz0djWBylxqO/eq686eDGr+egQ1lePj3Q= +github.com/streamingfast/bstream v0.0.2-0.20230829131224-b9272048dc6a h1:NeCO5JLz38HRK1uaV1Emo9u5gUSRtmtZZGNK8BKyLIE= +github.com/streamingfast/bstream v0.0.2-0.20230829131224-b9272048dc6a/go.mod h1:Njkx972HcZiz0djWBylxqO/eq686eDGr+egQ1lePj3Q= github.com/streamingfast/cli v0.0.4-0.20230825151644-8cc84512cd80 h1:UxJUTcEVkdZy8N77E3exz0iNlgQuxl4m220GPvzdZ2s= github.com/streamingfast/cli v0.0.4-0.20230825151644-8cc84512cd80/go.mod h1:QxjVH73Lkqk+mP8bndvhMuQDUINfkgsYhdCH/5TJFKI= github.com/streamingfast/dbin v0.0.0-20210809205249-73d5eca35dc5 h1:m/3aIPNXCwZ9m/dfYdOs8ftrS7GJl82ipVr6K2aZiBs= @@ -1195,8 +1195,8 @@ github.com/streamingfast/pbgo v0.0.6-0.20221020131607-255008258d28 h1:wmQg8T0rIF github.com/streamingfast/pbgo v0.0.6-0.20221020131607-255008258d28/go.mod h1:huKwfgTGFIFZMKSVbD5TywClM7zAeBUG/zePZMqvXQQ= github.com/streamingfast/shutter v1.5.0 h1:NpzDYzj0HVpSiDJVO/FFSL6QIK/YKOxY0gJAtyaTOgs= github.com/streamingfast/shutter v1.5.0/go.mod h1:B/T6efqdeMGbGwjzPS1ToXzYZI4kDzI5/u4I+7qbjY8= -github.com/streamingfast/substreams v1.1.15-0.20231005155216-0f07427759df h1:GPV+0Nn3PMoHzIPKAB5W1l4Nabvo/nmWxHq1UDoHVcQ= -github.com/streamingfast/substreams v1.1.15-0.20231005155216-0f07427759df/go.mod h1:fFJ8YYBXhzKTKBcC7vRQU6xZl/9KAfVfzuEB8C9hUVw= +github.com/streamingfast/substreams v1.1.18 h1:XRASHrXeWMOe5D7NXVbi+c9IcFB8hbbtI2oqdivNpE8= +github.com/streamingfast/substreams v1.1.18/go.mod h1:fFJ8YYBXhzKTKBcC7vRQU6xZl/9KAfVfzuEB8C9hUVw= github.com/streamingfast/substreams-sink v0.3.3-0.20230901183759-218c1d9ec645 h1:ZbYLft0R5hJBLhMFAdp7noAD9YrKC+r0nsU7Z5IDcfM= github.com/streamingfast/substreams-sink v0.3.3-0.20230901183759-218c1d9ec645/go.mod h1:nBPwmsjz+CV0HT5Vmp0XTiu+RjP8CbdhD5u+uC3lo84= github.com/streamingfast/substreams-sink-database-changes v1.1.3 h1:rXeGb/V2mjC8FftumRkMQxG2jtdLfHdLx9UQVUtAqS8= diff --git a/sinker/sinker.go b/sinker/sinker.go index 21d72dd..037834c 100644 --- a/sinker/sinker.go +++ b/sinker/sinker.go @@ -96,7 +96,6 @@ func (s *SQLSinker) Run(ctx context.Context) { func (s *SQLSinker) HandleBlockScopedData(ctx context.Context, data *pbsubstreamsrpc.BlockScopedData, isLive *bool, cursor *sink.Cursor) error { output := data.Output - blockNum := cursor.Block().Num() if output.Name != s.OutputModuleName() { return fmt.Errorf("received data from wrong output module, expected to received from %q but got module's output for %q", s.OutputModuleName(), output.Name) @@ -115,13 +114,13 @@ func (s *SQLSinker) HandleBlockScopedData(ctx context.Context, data *pbsubstream return fmt.Errorf("unmarshal database changes: %w", err) } - if err := s.applyDatabaseChanges(dbChanges, blockNum); err != nil { + if err := s.applyDatabaseChanges(dbChanges, data.Clock.Number, data.FinalBlockHeight); err != nil { return fmt.Errorf("apply database changes: %w", err) } - if blockNum%s.batchBlockModulo(data, isLive) == 0 { + if data.Clock.Number%s.batchBlockModulo(data, isLive) == 0 { flushStart := time.Now() - rowFlushedCount, err := s.loader.Flush(ctx, s.OutputModuleHash(), cursor) + rowFlushedCount, err := s.loader.Flush(ctx, s.OutputModuleHash(), cursor, data.FinalBlockHeight) if err != nil { return fmt.Errorf("failed to flush at block %s: %w", cursor.Block(), err) } @@ -147,7 +146,7 @@ func (s *SQLSinker) HandleBlockScopedData(ctx context.Context, data *pbsubstream return nil } -func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges, blockNum uint64) error { +func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges, blockNum, finalBlockNum uint64) error { for _, change := range dbChanges.TableChanges { if !s.loader.HasTable(change.Table) { return fmt.Errorf( @@ -177,19 +176,24 @@ func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges, changes[field.Name] = field.NewValue } + var reversibleBlockNum *uint64 + if blockNum > finalBlockNum { + reversibleBlockNum = &blockNum + } + switch change.Operation { case pbdatabase.TableChange_CREATE: - err := s.loader.Insert(change.Table, primaryKeys, changes, blockNum) + err := s.loader.Insert(change.Table, primaryKeys, changes, reversibleBlockNum) if err != nil { return fmt.Errorf("database insert: %w", err) } case pbdatabase.TableChange_UPDATE: - err := s.loader.Update(change.Table, primaryKeys, changes, blockNum) + err := s.loader.Update(change.Table, primaryKeys, changes, reversibleBlockNum) if err != nil { return fmt.Errorf("database update: %w", err) } case pbdatabase.TableChange_DELETE: - err := s.loader.Delete(change.Table, primaryKeys, blockNum) + err := s.loader.Delete(change.Table, primaryKeys, reversibleBlockNum) if err != nil { return fmt.Errorf("database delete: %w", err) } diff --git a/sinker/sinker_test.go b/sinker/sinker_test.go new file mode 100644 index 0000000..6449749 --- /dev/null +++ b/sinker/sinker_test.go @@ -0,0 +1,233 @@ +package sinker + +import ( + "context" + "fmt" + "testing" + + "github.com/streamingfast/bstream" + "github.com/streamingfast/logging" + sink "github.com/streamingfast/substreams-sink" + pbdatabase "github.com/streamingfast/substreams-sink-database-changes/pb/sf/substreams/sink/database/v1" + "github.com/streamingfast/substreams-sink-sql/db" + "github.com/streamingfast/substreams/client" + pbsubstreamsrpc "github.com/streamingfast/substreams/pb/sf/substreams/rpc/v2" + pbsubstreams "github.com/streamingfast/substreams/pb/sf/substreams/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/anypb" + + _ "github.com/lib/pq" +) + +var T = true +var flushEveryBlock = &T + +func pruneQuery(blockNum uint64) string { + return fmt.Sprintf(`DELETE FROM "testschema"."inserts_history" WHERE block_num <= %d;DELETE FROM "testschema"."updates_history" WHERE block_num <= %d;DELETE FROM "testschema"."deletes_history" WHERE block_num <= %d;`, + blockNum, blockNum, blockNum) +} + +func TestInserts(t *testing.T) { + + logger, tracer := logging.ApplicationLogger("test", "test") + + type event struct { + blockNum uint64 + libNum uint64 + tableChanges []*pbdatabase.TableChange + // undoUpTo uint64 + } + + tests := []struct { + name string + events []event + expectSQL []string + }{ + { + name: "insert final block", + events: []event{ + { + blockNum: 10, + libNum: 10, + tableChanges: []*pbdatabase.TableChange{insertRowSinglePK("xfer", "1234", "from", "sender1", "to", "receiver1")}, + }, + }, + expectSQL: []string{ + `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender1','1234','receiver1');`, + pruneQuery(10), + `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, + `COMMIT`, + }, + }, + { + name: "insert two final blocks", + events: []event{ + { + blockNum: 10, + libNum: 10, + tableChanges: []*pbdatabase.TableChange{insertRowSinglePK("xfer", "1234", "from", "sender1", "to", "receiver1")}, + }, + { + blockNum: 11, + libNum: 11, + tableChanges: []*pbdatabase.TableChange{insertRowSinglePK("xfer", "2345", "from", "sender2", "to", "receiver2")}, + }, + }, + expectSQL: []string{ + `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender1','1234','receiver1');`, + pruneQuery(10), + `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, + `COMMIT`, + `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender2','2345','receiver2');`, + pruneQuery(11), + `UPDATE "testschema"."cursors" set cursor = 'dR5-m-1v1TQvlVRfIM9SXaWwLpc_DFtuXwrkIBBAj4r3', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, + `COMMIT`, + }, + }, + { + name: "insert two reversible blocks", + events: []event{ + { + blockNum: 10, + libNum: 5, + tableChanges: []*pbdatabase.TableChange{insertRowSinglePK("xfer", "1234", "from", "sender1", "to", "receiver1")}, + }, + { + blockNum: 11, + libNum: 5, + tableChanges: []*pbdatabase.TableChange{insertRowSinglePK("xfer", "2345", "from", "sender2", "to", "receiver2")}, + }, + }, + expectSQL: []string{ + `INSERT INTO "testschema"."inserts_history" (table_name, id, block_num) values ('"testschema"."xfer"', '{"id":"1234"}', 10);` + + `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender1','1234','receiver1');`, + pruneQuery(5), + `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, + `COMMIT`, + `INSERT INTO "testschema"."inserts_history" (table_name, id, block_num) values ('"testschema"."xfer"', '{"id":"2345"}', 11);` + + `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender2','2345','receiver2');`, + pruneQuery(5), + `UPDATE "testschema"."cursors" set cursor = 'Euaqz6R-ylLG0gbdej7Me6WwLpcyB1tlVArvLxtE', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, + `COMMIT`, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx := context.Background() + l, tx := db.NewTestLoader( + logger, + tracer, + "testschema", + db.TestTables("testschema"), + ) + s, err := sink.New(sink.SubstreamsModeDevelopment, testPackage, testPackage.Modules.Modules[0], []byte("unused"), testClientConfig, logger, nil) + require.NoError(t, err) + sinker, _ := New(s, l, logger, nil) + + for _, evt := range test.events { + err := sinker.HandleBlockScopedData( + ctx, + blockScopedData("db_out", evt.tableChanges, evt.blockNum, evt.libNum), + flushEveryBlock, sink.MustNewCursor(simpleCursor(evt.blockNum, evt.libNum)), + ) + require.NoError(t, err) + } + + results := tx.Results() + assert.Equal(t, test.expectSQL, results) + + }) + } + +} + +var testPackage = &pbsubstreams.Package{ + Modules: &pbsubstreams.Modules{ + Modules: []*pbsubstreams.Module{ + { + Name: "db_out", + Kind: &pbsubstreams.Module_KindMap_{}, + Output: &pbsubstreams.Module_Output{ + Type: "proto:sf.substreams.sink.database.v1.DatabaseChanges", + }, + }, + }, + }, +} + +var testClientConfig = &client.SubstreamsClientConfig{} + +func getFields(fieldsAndValues ...string) (out []*pbdatabase.Field) { + if len(fieldsAndValues)%2 != 0 { + panic("tableChangeSinglePK needs even number of fieldsAndValues") + } + for i := 0; i < len(fieldsAndValues); i += 2 { + out = append(out, &pbdatabase.Field{ + Name: fieldsAndValues[i], + NewValue: fieldsAndValues[i+1], + }) + } + return +} + +func insertRowSinglePK(table string, pk string, fieldsAndValues ...string) *pbdatabase.TableChange { + return &pbdatabase.TableChange{ + Table: table, + PrimaryKey: &pbdatabase.TableChange_Pk{ + Pk: pk, + }, + Ordinal: 0, + Operation: pbdatabase.TableChange_CREATE, + Fields: getFields(fieldsAndValues...), + } +} + +func blockScopedData(module string, changes []*pbdatabase.TableChange, blockNum uint64, finalBlockNum uint64) *pbsubstreamsrpc.BlockScopedData { + mapOutput, err := anypb.New(&pbdatabase.DatabaseChanges{ + TableChanges: changes, + }) + if err != nil { + panic(err) + } + + return &pbsubstreamsrpc.BlockScopedData{ + Output: &pbsubstreamsrpc.MapModuleOutput{ + Name: module, + MapOutput: mapOutput, + }, + Clock: clock(fmt.Sprintf("%d", blockNum), blockNum), + Cursor: simpleCursor(blockNum, finalBlockNum), + FinalBlockHeight: finalBlockNum, + } +} +func mustNewTableInfo(schema, name string, pkList []string, columnsByName map[string]*db.ColumnInfo) *db.TableInfo { + ti, err := db.NewTableInfo(schema, name, pkList, columnsByName) + if err != nil { + panic(err) + } + return ti +} + +func clock(id string, num uint64) *pbsubstreams.Clock { + return &pbsubstreams.Clock{Id: id, Number: num} +} + +func simpleCursor(num, finalNum uint64) string { + id := fmt.Sprintf("%d", num) + finalID := fmt.Sprintf("%d", finalNum) + blk := bstream.NewBlockRef(id, num) + lib := bstream.NewBlockRef(finalID, finalNum) + step := bstream.StepNew + if id == finalID { + step = bstream.StepNewIrreversible + } + + return (&bstream.Cursor{ + Step: step, + Block: blk, + LIB: lib, + HeadBlock: blk, + }).ToOpaque() +} From 6f98a0f701cd025b517044ba9f9ec10068412d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Thu, 2 Nov 2023 10:38:21 -0400 Subject: [PATCH 03/10] add Revert boilerplate, still missing actual implementation --- db/db.go | 1 + db/dialect.go | 1 + db/dialect_clickhouse.go | 5 ++++ db/dialect_postgres.go | 6 ++++ db/flush.go | 16 ++++++++++ db/testing.go | 16 ++++++++++ sinker/sinker.go | 2 +- sinker/sinker_test.go | 65 +++++++++++++++++++++++++++------------- 8 files changed, 91 insertions(+), 21 deletions(-) diff --git a/db/db.go b/db/db.go index def8047..9111be6 100644 --- a/db/db.go +++ b/db/db.go @@ -98,6 +98,7 @@ type Tx interface { Rollback() error Commit() error ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) + QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error) } func (l *Loader) Begin() (Tx, error) { diff --git a/db/dialect.go b/db/dialect.go index 5a60884..b96ef6c 100644 --- a/db/dialect.go +++ b/db/dialect.go @@ -24,6 +24,7 @@ type dialect interface { GetUpdateCursorQuery(table, moduleHash string, cursor *sink.Cursor, block_num uint64, block_id string) string ParseDatetimeNormalization(value string) string Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) + Revert(tx Tx, ctx context.Context, l *Loader, lastValidFinalBlock uint64) error OnlyInserts() bool } diff --git a/db/dialect_clickhouse.go b/db/dialect_clickhouse.go index e03a9a0..0d9770c 100644 --- a/db/dialect_clickhouse.go +++ b/db/dialect_clickhouse.go @@ -81,6 +81,11 @@ func (d clickhouseDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputMo return entryCount, nil } +func (d clickhouseDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValidFinalBlock uint64) error { + // TODO implement revert + return nil +} + func (d clickhouseDialect) GetCreateCursorQuery(schema string, withPostgraphile bool) string { _ = withPostgraphile // TODO: see if this can work return fmt.Sprintf(cli.Dedent(` diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index d2c6d56..2a2d249 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -17,6 +17,11 @@ import ( type postgresDialect struct{} +func (d postgresDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValidFinalBlock uint64) error { + // query := "" + return nil +} + func (d postgresDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) { var rowCount int for entriesPair := l.entries.Oldest(); entriesPair != nil; entriesPair = entriesPair.Next() { @@ -278,6 +283,7 @@ func (d *postgresDialect) prepareColValues(table *TableInfo, colValues map[strin } values[i] = normalizedValue + columns[i] = columnInfo.escapedName // escape the column name } return } diff --git a/db/flush.go b/db/flush.go index 4bee1d4..46e6acc 100644 --- a/db/flush.go +++ b/db/flush.go @@ -46,6 +46,22 @@ func (l *Loader) Flush(ctx context.Context, outputModuleHash string, cursor *sin return rowFlushedCount, nil } +func (l *Loader) Revert(ctx context.Context, cursor *sink.Cursor, lastFinalBlock uint64) error { + tx, err := l.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("failed to being db transaction: %w", err) + } + defer func() { + if err != nil { + if err := tx.Rollback(); err != nil { + l.logger.Warn("failed to rollback transaction", zap.Error(err)) + } + } + }() + + return l.getDialect().Revert(tx, ctx, l, lastFinalBlock) +} + func (l *Loader) reset() { for entriesPair := l.entries.Oldest(); entriesPair != nil; entriesPair = entriesPair.Next() { l.entries.Set(entriesPair.Key, NewOrderedMap[string, *Operation]()) diff --git a/db/testing.go b/db/testing.go index 2da2a98..a92181d 100644 --- a/db/testing.go +++ b/db/testing.go @@ -3,6 +3,7 @@ package db import ( "context" "database/sql" + "fmt" "github.com/streamingfast/logging" "go.uber.org/zap" @@ -53,6 +54,7 @@ func mustNewTableInfo(schema, name string, pkList []string, columnsByName map[st type TestTx struct { queries []string + next []*sql.Rows } func (t *TestTx) Rollback() error { @@ -74,6 +76,20 @@ func (t *TestTx) Results() []string { return t.queries } +func (t *TestTx) AppendResp(in *sql.Rows) { + t.next = append(t.next, in) + +} + +func (t *TestTx) QueryContext(ctx context.Context, query string, args ...any) (out *sql.Rows, err error) { + if len(t.next) == 0 { + return nil, fmt.Errorf("testTx queried but no responses were set") + } + + out, t.next = t.next[0], t.next[1:] + return out, nil +} + type testResult struct{} func (t *testResult) LastInsertId() (int64, error) { diff --git a/sinker/sinker.go b/sinker/sinker.go index 037834c..36b2a7d 100644 --- a/sinker/sinker.go +++ b/sinker/sinker.go @@ -205,7 +205,7 @@ func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges, } func (s *SQLSinker) HandleBlockUndoSignal(ctx context.Context, data *pbsubstreamsrpc.BlockUndoSignal, cursor *sink.Cursor) error { - return fmt.Errorf("received undo signal but there is no handling of undo, this is because you used `--undo-buffer-size=0` which is invalid right now") + return s.loader.Revert(ctx, cursor, data.LastValidBlock.Number) } func (s *SQLSinker) batchBlockModulo(blockData *pbsubstreamsrpc.BlockScopedData, isLive *bool) uint64 { diff --git a/sinker/sinker_test.go b/sinker/sinker_test.go index 6449749..586718c 100644 --- a/sinker/sinker_test.go +++ b/sinker/sinker_test.go @@ -20,14 +20,6 @@ import ( _ "github.com/lib/pq" ) -var T = true -var flushEveryBlock = &T - -func pruneQuery(blockNum uint64) string { - return fmt.Sprintf(`DELETE FROM "testschema"."inserts_history" WHERE block_num <= %d;DELETE FROM "testschema"."updates_history" WHERE block_num <= %d;DELETE FROM "testschema"."deletes_history" WHERE block_num <= %d;`, - blockNum, blockNum, blockNum) -} - func TestInserts(t *testing.T) { logger, tracer := logging.ApplicationLogger("test", "test") @@ -36,7 +28,7 @@ func TestInserts(t *testing.T) { blockNum uint64 libNum uint64 tableChanges []*pbdatabase.TableChange - // undoUpTo uint64 + undoSignal bool } tests := []struct { @@ -54,8 +46,8 @@ func TestInserts(t *testing.T) { }, }, expectSQL: []string{ - `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender1','1234','receiver1');`, - pruneQuery(10), + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, + pruneBelow(10), `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, }, @@ -75,18 +67,18 @@ func TestInserts(t *testing.T) { }, }, expectSQL: []string{ - `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender1','1234','receiver1');`, - pruneQuery(10), + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, + pruneBelow(10), `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, - `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender2','2345','receiver2');`, - pruneQuery(11), + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender2','2345','receiver2');`, + pruneBelow(11), `UPDATE "testschema"."cursors" set cursor = 'dR5-m-1v1TQvlVRfIM9SXaWwLpc_DFtuXwrkIBBAj4r3', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, }, }, { - name: "insert two reversible blocks", + name: "insert two reversible blocks, then UNDO last", events: []event{ { blockNum: 10, @@ -98,18 +90,28 @@ func TestInserts(t *testing.T) { libNum: 5, tableChanges: []*pbdatabase.TableChange{insertRowSinglePK("xfer", "2345", "from", "sender2", "to", "receiver2")}, }, + { + blockNum: 10, // undo everything above 10 + libNum: 5, + undoSignal: true, + }, }, expectSQL: []string{ `INSERT INTO "testschema"."inserts_history" (table_name, id, block_num) values ('"testschema"."xfer"', '{"id":"1234"}', 10);` + - `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender1','1234','receiver1');`, - pruneQuery(5), + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, + pruneBelow(5), `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, `INSERT INTO "testschema"."inserts_history" (table_name, id, block_num) values ('"testschema"."xfer"', '{"id":"2345"}', 11);` + - `INSERT INTO "testschema"."xfer" (from,id,to) VALUES ('sender2','2345','receiver2');`, - pruneQuery(5), + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender2','2345','receiver2');`, + pruneBelow(5), `UPDATE "testschema"."cursors" set cursor = 'Euaqz6R-ylLG0gbdej7Me6WwLpcyB1tlVArvLxtE', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, + // UNDO above block 10 + `DELETE FROM "testschema"."xfer" WHERE "id" = "2345";`, + `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, + pruneAbove(10), + `COMMIT`, }, }, } @@ -127,6 +129,16 @@ func TestInserts(t *testing.T) { sinker, _ := New(s, l, logger, nil) for _, evt := range test.events { + if evt.undoSignal { + cursor := simpleCursor(evt.blockNum, evt.libNum) + err := sinker.HandleBlockUndoSignal(ctx, &pbsubstreamsrpc.BlockUndoSignal{ + LastValidBlock: &pbsubstreams.BlockRef{Id: fmt.Sprintf("%d", evt.blockNum), Number: evt.blockNum}, + LastValidCursor: cursor, + }, sink.MustNewCursor(cursor)) + require.NoError(t, err) + continue + } + err := sinker.HandleBlockScopedData( ctx, blockScopedData("db_out", evt.tableChanges, evt.blockNum, evt.libNum), @@ -143,6 +155,9 @@ func TestInserts(t *testing.T) { } +var T = true +var flushEveryBlock = &T + var testPackage = &pbsubstreams.Package{ Modules: &pbsubstreams.Modules{ Modules: []*pbsubstreams.Module{ @@ -159,6 +174,16 @@ var testPackage = &pbsubstreams.Package{ var testClientConfig = &client.SubstreamsClientConfig{} +func pruneAbove(blockNum uint64) string { + return fmt.Sprintf(`DELETE FROM "testschema"."inserts_history" WHERE block_num > %d;DELETE FROM "testschema"."updates_history" WHERE block_num > %d;DELETE FROM "testschema"."deletes_history" WHERE block_num > %d;`, + blockNum, blockNum, blockNum) +} + +func pruneBelow(blockNum uint64) string { + return fmt.Sprintf(`DELETE FROM "testschema"."inserts_history" WHERE block_num <= %d;DELETE FROM "testschema"."updates_history" WHERE block_num <= %d;DELETE FROM "testschema"."deletes_history" WHERE block_num <= %d;`, + blockNum, blockNum, blockNum) +} + func getFields(fieldsAndValues ...string) (out []*pbdatabase.Field) { if len(fieldsAndValues)%2 != 0 { panic("tableChangeSinglePK needs even number of fieldsAndValues") From 03085cb98a0140dec7db21c94676171ba8e442e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Thu, 2 Nov 2023 16:19:35 -0400 Subject: [PATCH 04/10] complete the undo for inserts/updates/deletes on postgres --- db/db.go | 18 ++-- db/dialect.go | 2 +- db/dialect_clickhouse.go | 23 +---- db/dialect_postgres.go | 186 ++++++++++++++++++++++++------------ db/dialect_postgres_test.go | 67 +++++++++++++ db/flush.go | 17 +++- db/testing.go | 14 +-- sinker/sinker.go | 2 +- sinker/sinker_test.go | 157 ++++++++++++++++++++++++++---- 9 files changed, 369 insertions(+), 117 deletions(-) diff --git a/db/db.go b/db/db.go index 9111be6..c9b38bf 100644 --- a/db/db.go +++ b/db/db.go @@ -281,25 +281,29 @@ func (l *Loader) SetupFromBytes(ctx context.Context, schemaBytes []byte, withPos return fmt.Errorf("setup cursor table: %w", err) } + if err := l.setupHistoryTable(ctx); err != nil { + return fmt.Errorf("setup history table: %w", err) + } + return nil } func (l *Loader) setupCursorTable(ctx context.Context, withPostgraphile bool) error { _, err := l.ExecContext(ctx, l.GetCreateCursorsTableSQL(withPostgraphile)) + return err +} - if err != nil { - return fmt.Errorf("creating cursor table: %w", err) - } - - return nil +func (l *Loader) setupHistoryTable(ctx context.Context) error { + _, err := l.ExecContext(ctx, l.GetCreateHistoryTableSQL()) + return err } func (l *Loader) GetCreateCursorsTableSQL(withPostgraphile bool) string { return l.getDialect().GetCreateCursorQuery(l.schema, withPostgraphile) } -func (l *Loader) GetCreateSubstreamsHistoryTableSQL() string { - return l.getDialect().GetCreateSubstreamsHistoryTableQuery(l.schema) +func (l *Loader) GetCreateHistoryTableSQL() string { + return l.getDialect().GetCreateHistoryQuery(l.schema) } func (l *Loader) getDialect() dialect { diff --git a/db/dialect.go b/db/dialect.go index b96ef6c..ed708a7 100644 --- a/db/dialect.go +++ b/db/dialect.go @@ -18,7 +18,7 @@ func (e UnknownDriverError) Error() string { type dialect interface { GetCreateCursorQuery(schema string, withPostgraphile bool) string - GetCreateSubstreamsHistoryTableQuery(schema string) string + GetCreateHistoryQuery(schema string) string ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error DriverSupportRowsAffected() bool GetUpdateCursorQuery(table, moduleHash string, cursor *sink.Cursor, block_num uint64, block_id string) string diff --git a/db/dialect_clickhouse.go b/db/dialect_clickhouse.go index 0d9770c..1b0daa0 100644 --- a/db/dialect_clickhouse.go +++ b/db/dialect_clickhouse.go @@ -99,32 +99,19 @@ func (d clickhouseDialect) GetCreateCursorQuery(schema string, withPostgraphile `), EscapeIdentifier(schema), EscapeIdentifier("cursors")) } -func (d clickhouseDialect) GetCreateSubstreamsHistoryTableQuery(schema string) string { +func (d clickhouseDialect) GetCreateHistoryQuery(schema string) string { out := fmt.Sprintf(cli.Dedent(` create table if not exists %s.%s ( + id SERIAL PRIMARY KEY, + op char, table_name text, - id text, - block_num bigint - ) Engine = ReplacingMergeTree() ORDER BY block_num; - create table if not exists %s.%s - ( - table_name text, - id text, - prev_value text, - block_num bigint - ) Engine = ReplacingMergeTree() ORDER BY block_num; - create table if not exists %s.%s - ( - table_name text, - id text, + pk text, prev_value text, block_num bigint ) Engine = ReplacingMergeTree() ORDER BY block_num; `), - EscapeIdentifier(schema), EscapeIdentifier("inserts_history"), - EscapeIdentifier(schema), EscapeIdentifier("updates_history"), - EscapeIdentifier(schema), EscapeIdentifier("deletes_history"), + EscapeIdentifier(schema), EscapeIdentifier("history"), ) return out } diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index 2a2d249..def17ed 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -2,6 +2,7 @@ package db import ( "context" + "encoding/json" "fmt" "reflect" "sort" @@ -18,8 +19,42 @@ import ( type postgresDialect struct{} func (d postgresDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValidFinalBlock uint64) error { - // query := "" - return nil + query := fmt.Sprintf(`SELECT (op,table_name,pk,prev_value,block_num) FROM %s WHERE "block_num" > %d ORDER BY "block_num" DESC`, + d.historyTable(l.schema), + lastValidFinalBlock, + ) + + rows, err := tx.QueryContext(ctx, query) + if err != nil { + return err + } + + if rows != nil { // rows will be nil with no error only in testing scenarios + defer rows.Close() + for rows.Next() { + var op string + var table_name string + var pk string + var prev_value string + var block_num uint64 + if err := rows.Scan(&op, &table_name, &pk, &prev_value, &block_num); err != nil { + return err + } + if err := d.revertOp(tx, ctx, op, l.schema, table_name, pk, prev_value, block_num); err != nil { + return err + } + } + if rows.Err() != nil { + return err + } + } + pruneHistory := fmt.Sprintf(`DELETE FROM %s WHERE "block_num" > %d;`, + d.historyTable(l.schema), + lastValidFinalBlock, + ) + + _, err = tx.ExecContext(ctx, pruneHistory) + return err } func (d postgresDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) { @@ -57,12 +92,73 @@ func (d postgresDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputModu return rowCount, nil } -func (d postgresDialect) pruneReversibleSegment(tx Tx, ctx context.Context, schema string, highestFinalBlock uint64) error { - pruneInserts := fmt.Sprintf(`DELETE FROM %s WHERE block_num <= %d;`, d.insertsTable(schema), highestFinalBlock) - pruneUpdates := fmt.Sprintf(`DELETE FROM %s WHERE block_num <= %d;`, d.updatesTable(schema), highestFinalBlock) - pruneDeletes := fmt.Sprintf(`DELETE FROM %s WHERE block_num <= %d;`, d.deletesTable(schema), highestFinalBlock) - query := pruneInserts + pruneUpdates + pruneDeletes +func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, schema, table_name, pk, prev_value string, block_num uint64) error { + + pkmap := make(map[string]string) + if err := json.Unmarshal([]byte(pk), &pkmap); err != nil { + return err + } + switch op { + case "I": + query := fmt.Sprintf(`DELETE FROM %s.%s WHERE %s;`, + EscapeIdentifier(schema), + EscapeIdentifier(table_name), + getPrimaryKeyWhereClause(pkmap), + ) + if _, err := tx.ExecContext(ctx, query); err != nil { + return fmt.Errorf("executing revert query %q: %w", query, err) + } + case "D": + query := fmt.Sprintf(`INSERT INTO %s.%s SELECT * FROM json_populate_record(null:%s.%s,%s);`, + EscapeIdentifier(schema), EscapeIdentifier(table_name), + EscapeIdentifier(schema), EscapeIdentifier(table_name), + escapeStringValue(prev_value), + ) + if _, err := tx.ExecContext(ctx, query); err != nil { + return fmt.Errorf("executing revert query %q: %w", query, err) + } + + case "U": + columns, err := sqlColumnNamesFromJSON(prev_value) + if err != nil { + return err + } + query := fmt.Sprintf(`UPDATE %s.%s SET(%s)=((SELECT %s FROM json_populate_record(null:%s.%s,%s))) WHERE %s;`, + EscapeIdentifier(schema), EscapeIdentifier(table_name), + columns, + columns, + EscapeIdentifier(schema), EscapeIdentifier(table_name), + escapeStringValue(prev_value), + getPrimaryKeyWhereClause(pkmap), + ) + if _, err := tx.ExecContext(ctx, query); err != nil { + return fmt.Errorf("executing revert query %q: %w", query, err) + } + default: + panic("invalid op in revert command") + } + return nil +} + +func sqlColumnNamesFromJSON(in string) (string, error) { + valueMap := make(map[string]string) + if err := json.Unmarshal([]byte(in), &valueMap); err != nil { + return "", err + } + escapedNames := make([]string, len(valueMap)) + i := 0 + for k := range valueMap { + escapedNames[i] = EscapeIdentifier(k) + i++ + } + sort.Strings(escapedNames) + + return strings.Join(escapedNames, ","), nil +} + +func (d postgresDialect) pruneReversibleSegment(tx Tx, ctx context.Context, schema string, highestFinalBlock uint64) error { + query := fmt.Sprintf(`DELETE FROM %s WHERE block_num <= %d;`, d.historyTable(schema), highestFinalBlock) if _, err := tx.ExecContext(ctx, query); err != nil { return fmt.Errorf("executing prune query %q: %w", query, err) } @@ -86,32 +182,19 @@ func (d postgresDialect) GetCreateCursorQuery(schema string, withPostgraphile bo return out } -func (d postgresDialect) GetCreateSubstreamsHistoryTableQuery(schema string) string { +func (d postgresDialect) GetCreateHistoryQuery(schema string) string { out := fmt.Sprintf(cli.Dedent(` create table if not exists %s ( - table_name text, - id text, - block_num bigint - ); - create table if not exists %s - ( - table_name text, - id text, - prev_value text, - block_num bigint - ); - create table if not exists %s - ( - table_name text, - id text, - prev_value text, - block_num bigint + id SERIAL PRIMARY KEY, + op char, + table_name text, + pk text, + prev_value text, + block_num bigint ); `), - d.insertsTable(schema), - d.updatesTable(schema), - d.deletesTable(schema), + d.historyTable(schema), ) return out } @@ -141,51 +224,33 @@ func (d postgresDialect) OnlyInserts() bool { return false } -func (d postgresDialect) insertsTable(schema string) string { - return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("inserts_history")) +func (d postgresDialect) historyTable(schema string) string { + return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("history")) } func (d postgresDialect) saveInsert(schema string, table string, primaryKey map[string]string, blockNum uint64) string { - return fmt.Sprintf(`INSERT INTO %s (table_name, id, block_num) values (%s, %s, %d);`, - d.insertsTable(schema), + return fmt.Sprintf(`INSERT INTO %s (op,table_name,pk,block_num) values (%s,%s,%s,%d);`, + d.historyTable(schema), + escapeStringValue("I"), escapeStringValue(table), escapeStringValue(primaryKeyToJSON(primaryKey)), blockNum, ) } -func (d postgresDialect) updatesTable(schema string) string { - return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("updates_history")) -} - -func (d postgresDialect) saveUpdate(schema string, table string, primaryKey map[string]string, blockNum uint64) string { - return d.saveRow(table, d.updatesTable(schema), primaryKey, blockNum) -} - -func (d postgresDialect) deletesTable(schema string) string { - return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("deletes_history")) +func (d postgresDialect) saveUpdate(schema string, escapedTable string, primaryKey map[string]string, blockNum uint64) string { + return d.saveRow("U", schema, escapedTable, primaryKey, blockNum) } -func (d postgresDialect) saveDelete(schema string, table string, primaryKey map[string]string, blockNum uint64) string { - return d.saveRow(table, d.deletesTable(schema), primaryKey, blockNum) +func (d postgresDialect) saveDelete(schema string, escapedTable string, primaryKey map[string]string, blockNum uint64) string { + return d.saveRow("D", schema, escapedTable, primaryKey, blockNum) } -func (d postgresDialect) saveRow(table string, targetTable string, primaryKey map[string]string, blockNum uint64) string { - // insert into deletes_history (table_name, id, prev_value, block_num) - // select 'ownership_transferred', - // '["evt_tx_hash":"00006614dade7f56557b84e5fe674a264a50e83eec52ccec62c9fff4c2de4a2a","evt_index":"132"]', - // row_to_json(ownership_transferred), - // 12345678 from ownership_transferred - // where evt_tx_hash = '22199329b0aa1aa68902a78e3b32ca327c872fab166c7a2838273de6ad383eba' and evt_index = 249 - - return fmt.Sprintf(`INSERT INTO %s (table_name, id, prev_value, block_num) - SELECT %s, %s, row_to_json(%s), %d - FROM %s - WHERE %s`, - - targetTable, - escapeStringValue(table), escapeStringValue(primaryKeyToJSON(primaryKey)), EscapeIdentifier(table), blockNum, - EscapeIdentifier(table), +func (d postgresDialect) saveRow(op, schema, escapedTable string, primaryKey map[string]string, blockNum uint64) string { + return fmt.Sprintf(`INSERT INTO %s (op,table_name,pk,prev_value,block_num) SELECT %s,%s,%s,row_to_json(%s),%d FROM %s WHERE %s;`, + d.historyTable(schema), + escapeStringValue(op), escapeStringValue(escapedTable), escapeStringValue(primaryKeyToJSON(primaryKey)), escapedTable, blockNum, + escapedTable, getPrimaryKeyWhereClause(primaryKey), ) @@ -300,6 +365,7 @@ func getPrimaryKeyWhereClause(primaryKey map[string]string) string { for key, value := range primaryKey { reg = append(reg, EscapeIdentifier(key)+" = "+escapeStringValue(value)) } + sort.Strings(reg) return strings.Join(reg[:], " AND ") } diff --git a/db/dialect_postgres_test.go b/db/dialect_postgres_test.go index 75368f8..427ecab 100644 --- a/db/dialect_postgres_test.go +++ b/db/dialect_postgres_test.go @@ -1,6 +1,7 @@ package db import ( + "context" "testing" "github.com/stretchr/testify/assert" @@ -91,3 +92,69 @@ func TestJSONToPrimaryKey(t *testing.T) { } } + +func TestRevertOp(t *testing.T) { + + type row struct { + op string + schema string + table_name string + pk string + prev_value string + } + + tests := []struct { + name string + row row + expect string + }{ + { + name: "rollback insert row", + row: row{ + op: "I", + schema: "testschema", + table_name: "xfer", + pk: `{"id":"2345"}`, + prev_value: "", // unused + }, + expect: `DELETE FROM "testschema"."xfer" WHERE "id" = '2345';`, + }, + { + name: "rollback delete row", + row: row{ + op: "D", + schema: "testschema", + table_name: "xfer", + pk: `{"id":"2345"}`, + prev_value: `{"id":"2345","sender":"0xdead","receiver":"0xbeef"}`, + }, + expect: `INSERT INTO "testschema"."xfer" SELECT * FROM json_populate_record(null:"testschema"."xfer",` + + `'{"id":"2345","sender":"0xdead","receiver":"0xbeef"}');`, + }, + { + name: "rollback update row", + row: row{ + op: "U", + schema: "testschema", + table_name: "xfer", + pk: `{"id":"2345"}`, + prev_value: `{"id":"2345","sender":"0xdead","receiver":"0xbeef"}`, + }, + expect: `UPDATE "testschema"."xfer" SET("id","receiver","sender")=((SELECT "id","receiver","sender" FROM json_populate_record(null:"testschema"."xfer",` + + `'{"id":"2345","sender":"0xdead","receiver":"0xbeef"}'))) WHERE "id" = '2345';`, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tx := &TestTx{} + ctx := context.Background() + pd := postgresDialect{} + + row := test.row + err := pd.revertOp(tx, ctx, row.op, row.schema, row.table_name, row.pk, row.prev_value, 9999) + require.NoError(t, err) + assert.Equal(t, []string{test.expect}, tx.Results()) + }) + } + +} diff --git a/db/flush.go b/db/flush.go index 46e6acc..1da37f8 100644 --- a/db/flush.go +++ b/db/flush.go @@ -46,7 +46,7 @@ func (l *Loader) Flush(ctx context.Context, outputModuleHash string, cursor *sin return rowFlushedCount, nil } -func (l *Loader) Revert(ctx context.Context, cursor *sink.Cursor, lastFinalBlock uint64) error { +func (l *Loader) Revert(ctx context.Context, outputModuleHash string, cursor *sink.Cursor, lastValidBlock uint64) error { tx, err := l.BeginTx(ctx, nil) if err != nil { return fmt.Errorf("failed to being db transaction: %w", err) @@ -59,7 +59,20 @@ func (l *Loader) Revert(ctx context.Context, cursor *sink.Cursor, lastFinalBlock } }() - return l.getDialect().Revert(tx, ctx, l, lastFinalBlock) + if err := l.getDialect().Revert(tx, ctx, l, lastValidBlock); err != nil { + return err + } + + if err := l.UpdateCursor(ctx, tx, outputModuleHash, cursor); err != nil { + return fmt.Errorf("update cursor after revert: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("failed to commit db transaction: %w", err) + } + + l.logger.Debug("reverted changes to database", zap.Uint64("last_valid_block", lastValidBlock)) + return nil } func (l *Loader) reset() { diff --git a/db/testing.go b/db/testing.go index a92181d..f2372e3 100644 --- a/db/testing.go +++ b/db/testing.go @@ -3,7 +3,6 @@ package db import ( "context" "database/sql" - "fmt" "github.com/streamingfast/logging" "go.uber.org/zap" @@ -76,18 +75,9 @@ func (t *TestTx) Results() []string { return t.queries } -func (t *TestTx) AppendResp(in *sql.Rows) { - t.next = append(t.next, in) - -} - func (t *TestTx) QueryContext(ctx context.Context, query string, args ...any) (out *sql.Rows, err error) { - if len(t.next) == 0 { - return nil, fmt.Errorf("testTx queried but no responses were set") - } - - out, t.next = t.next[0], t.next[1:] - return out, nil + t.queries = append(t.queries, query) + return nil, nil } type testResult struct{} diff --git a/sinker/sinker.go b/sinker/sinker.go index 36b2a7d..f7da20b 100644 --- a/sinker/sinker.go +++ b/sinker/sinker.go @@ -205,7 +205,7 @@ func (s *SQLSinker) applyDatabaseChanges(dbChanges *pbdatabase.DatabaseChanges, } func (s *SQLSinker) HandleBlockUndoSignal(ctx context.Context, data *pbsubstreamsrpc.BlockUndoSignal, cursor *sink.Cursor) error { - return s.loader.Revert(ctx, cursor, data.LastValidBlock.Number) + return s.loader.Revert(ctx, s.OutputModuleHash(), cursor, data.LastValidBlock.Number) } func (s *SQLSinker) batchBlockModulo(blockData *pbsubstreamsrpc.BlockScopedData, isLive *bool) uint64 { diff --git a/sinker/sinker_test.go b/sinker/sinker_test.go index 586718c..8b17076 100644 --- a/sinker/sinker_test.go +++ b/sinker/sinker_test.go @@ -2,6 +2,7 @@ package sinker import ( "context" + "database/sql" "fmt" "testing" @@ -15,14 +16,20 @@ import ( pbsubstreams "github.com/streamingfast/substreams/pb/sf/substreams/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.uber.org/zap" "google.golang.org/protobuf/types/known/anypb" _ "github.com/lib/pq" ) -func TestInserts(t *testing.T) { +var logger *zap.Logger +var tracer logging.Tracer - logger, tracer := logging.ApplicationLogger("test", "test") +func init() { + logger, tracer = logging.ApplicationLogger("test", "test") +} + +func TestInserts(t *testing.T) { type event struct { blockNum uint64 @@ -32,9 +39,10 @@ func TestInserts(t *testing.T) { } tests := []struct { - name string - events []event - expectSQL []string + name string + events []event + expectSQL []string + queryResponses []*sql.Rows }{ { name: "insert final block", @@ -47,7 +55,7 @@ func TestInserts(t *testing.T) { }, expectSQL: []string{ `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - pruneBelow(10), + `DELETE FROM "testschema"."history" WHERE block_num <= 10;`, `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, }, @@ -68,15 +76,95 @@ func TestInserts(t *testing.T) { }, expectSQL: []string{ `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - pruneBelow(10), + `DELETE FROM "testschema"."history" WHERE block_num <= 10;`, `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender2','2345','receiver2');`, - pruneBelow(11), + `DELETE FROM "testschema"."history" WHERE block_num <= 11;`, `UPDATE "testschema"."cursors" set cursor = 'dR5-m-1v1TQvlVRfIM9SXaWwLpc_DFtuXwrkIBBAj4r3', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, }, }, + { + name: "insert a reversible blocks", + events: []event{ + { + blockNum: 10, + libNum: 5, + tableChanges: []*pbdatabase.TableChange{insertRowSinglePK("xfer", "1234", "from", "sender1", "to", "receiver1")}, + }, + }, + expectSQL: []string{ + `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234"}',10);` + + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, + `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, + `COMMIT`, + }, + }, + { + name: "insert, then update", + events: []event{ + { + blockNum: 10, + libNum: 5, + tableChanges: []*pbdatabase.TableChange{insertRowMultiplePK("xfer", map[string]string{"id": "1234", "idx": "3"}, "from", "sender1", "to", "receiver1")}, + }, + { + blockNum: 11, + libNum: 6, + tableChanges: []*pbdatabase.TableChange{ + updateRowMultiplePK("xfer", map[string]string{"id": "2345", "idx": "3"}, "from", "sender2", "to", "receiver2"), + }, + }, + }, + expectSQL: []string{ + `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234","idx":"3"}',10);` + + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, + `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, + `COMMIT`, + `INSERT INTO "testschema"."history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + `UPDATE "testschema"."xfer" SET "from"='sender2', "to"='receiver2' WHERE "id" = '2345' AND "idx" = '3'`, + `DELETE FROM "testschema"."history" WHERE block_num <= 6;`, + `UPDATE "testschema"."cursors" set cursor = 'LamYQ1PoEJyzLTRd7kdEiKWwLpcyB1tlVArvLBtH', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, + `COMMIT`, + }, + }, + + { + name: "insert, then update, then delete (update disappears)", + events: []event{ + { + blockNum: 10, + libNum: 5, + tableChanges: []*pbdatabase.TableChange{insertRowMultiplePK("xfer", map[string]string{"id": "1234", "idx": "3"}, "from", "sender1", "to", "receiver1")}, + }, + { + blockNum: 11, + libNum: 6, + tableChanges: []*pbdatabase.TableChange{ + updateRowMultiplePK("xfer", map[string]string{"id": "2345", "idx": "3"}, "from", "sender2", "to", "receiver2"), + deleteRowMultiplePK("xfer", map[string]string{"id": "2345", "idx": "3"}), + }, + }, + }, + expectSQL: []string{ + `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234","idx":"3"}',10);` + + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, + `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, + `COMMIT`, + //`INSERT INTO "testschema"."history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + // `UPDATE "testschema"."xfer" SET "from"='sender2', "to"='receiver2' WHERE "id" = '2345' AND "idx" = '3'`, + `INSERT INTO "testschema"."history" (op,table_name,pk,prev_value,block_num) SELECT 'D','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + `DELETE FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3'`, + `DELETE FROM "testschema"."history" WHERE block_num <= 6;`, + `UPDATE "testschema"."cursors" set cursor = 'LamYQ1PoEJyzLTRd7kdEiKWwLpcyB1tlVArvLBtH', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, + `COMMIT`, + }, + }, + { name: "insert two reversible blocks, then UNDO last", events: []event{ @@ -97,20 +185,21 @@ func TestInserts(t *testing.T) { }, }, expectSQL: []string{ - `INSERT INTO "testschema"."inserts_history" (table_name, id, block_num) values ('"testschema"."xfer"', '{"id":"1234"}', 10);` + + `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234"}',10);` + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - pruneBelow(5), + `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, - `INSERT INTO "testschema"."inserts_history" (table_name, id, block_num) values ('"testschema"."xfer"', '{"id":"2345"}', 11);` + + `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"2345"}',11);` + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender2','2345','receiver2');`, - pruneBelow(5), + `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'Euaqz6R-ylLG0gbdej7Me6WwLpcyB1tlVArvLxtE', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, - // UNDO above block 10 - `DELETE FROM "testschema"."xfer" WHERE "id" = "2345";`, + `SELECT (op,table_name,pk,prev_value,block_num) FROM "testschema"."history" WHERE "block_num" > 10 ORDER BY "block_num" DESC`, + + //`DELETE FROM "testschema"."xfer" WHERE "id" = "2345";`, // this mechanism is tested in db.revertOp + `DELETE FROM "testschema"."history" WHERE "block_num" > 10;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, - pruneAbove(10), `COMMIT`, }, }, @@ -203,12 +292,48 @@ func insertRowSinglePK(table string, pk string, fieldsAndValues ...string) *pbda PrimaryKey: &pbdatabase.TableChange_Pk{ Pk: pk, }, - Ordinal: 0, Operation: pbdatabase.TableChange_CREATE, Fields: getFields(fieldsAndValues...), } } +func insertRowMultiplePK(table string, pk map[string]string, fieldsAndValues ...string) *pbdatabase.TableChange { + return &pbdatabase.TableChange{ + Table: table, + PrimaryKey: &pbdatabase.TableChange_CompositePk{ + CompositePk: &pbdatabase.CompositePrimaryKey{ + Keys: pk, + }, + }, + Operation: pbdatabase.TableChange_CREATE, + Fields: getFields(fieldsAndValues...), + } +} + +func updateRowMultiplePK(table string, pk map[string]string, fieldsAndValues ...string) *pbdatabase.TableChange { + return &pbdatabase.TableChange{ + Table: table, + PrimaryKey: &pbdatabase.TableChange_CompositePk{ + CompositePk: &pbdatabase.CompositePrimaryKey{ + Keys: pk, + }, + }, + Operation: pbdatabase.TableChange_UPDATE, + Fields: getFields(fieldsAndValues...), + } +} +func deleteRowMultiplePK(table string, pk map[string]string) *pbdatabase.TableChange { + return &pbdatabase.TableChange{ + Table: table, + PrimaryKey: &pbdatabase.TableChange_CompositePk{ + CompositePk: &pbdatabase.CompositePrimaryKey{ + Keys: pk, + }, + }, + Operation: pbdatabase.TableChange_DELETE, + } +} + func blockScopedData(module string, changes []*pbdatabase.TableChange, blockNum uint64, finalBlockNum uint64) *pbsubstreamsrpc.BlockScopedData { mapOutput, err := anypb.New(&pbdatabase.DatabaseChanges{ TableChanges: changes, From 90b1807a1c5271a903b770956230e220fc8c6f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Mon, 6 Nov 2023 09:37:07 -0500 Subject: [PATCH 05/10] enable reorg handling by default, add sanity check for history table, prepare release v4.0.0-beta --- CHANGELOG.md | 15 +++++ cmd/substreams-sink-sql/common_flags.go | 14 ++-- cmd/substreams-sink-sql/generate_csv.go | 2 +- cmd/substreams-sink-sql/run.go | 14 +++- cmd/substreams-sink-sql/setup.go | 4 +- cmd/substreams-sink-sql/tools.go | 21 +++--- db/db.go | 89 +++++++++++++------------ db/dialect_clickhouse.go | 21 +----- db/dialect_postgres.go | 40 ++++++----- db/dialect_postgres_test.go | 12 ++-- db/operations_test.go | 4 +- db/ops.go | 15 +++-- db/ops_test.go | 65 ++++++++++++++++++ db/testing.go | 6 +- db/types.go | 27 ++++---- devel/eth-block-meta/schema.sql | 9 --- sinker/sinker_test.go | 40 +++++------ 17 files changed, 235 insertions(+), 163 deletions(-) create mode 100644 db/ops_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 882cd30..8d36213 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v4.0.0-beta + +### Highlights + +* This release brings support for managing reorgs in Postgres database, enabled by default when `--undo-buffer-size` to 0. + +### Breaking changes + +* A change in your SQL schema may be required to keep existing substreams:SQL integrations working: + * The presence of a primary key (single key or composite) is now *MANDATORY* on every table. + * The `sf.substreams.sink.database.v1.TableChange` message, generated inside substreams, must now exactly match its primary key with the one in the SQL schema. + * You will need to re-run `setup` on your existing PostgreSQL databases to add the `substreams_history` table. + +* Since reorgs management is not yet supported on Clickhouse, users will have to set `--undo-buffer-size` to a non-zero value (`12` was the previous default) + ## Protodefs v1.0.4 * Added support for `rest_frontend` field with `enabled` boolean flag, aimed at this backend implementation: https://github.com/semiotic-ai/sql-wrapper diff --git a/cmd/substreams-sink-sql/common_flags.go b/cmd/substreams-sink-sql/common_flags.go index 19e490c..7b6e3b1 100644 --- a/cmd/substreams-sink-sql/common_flags.go +++ b/cmd/substreams-sink-sql/common_flags.go @@ -58,24 +58,22 @@ func newDBLoader( cmd *cobra.Command, psqlDSN string, flushInterval time.Duration, + handleReorgs bool, ) (*db.Loader, error) { moduleMismatchMode, err := db.ParseOnModuleHashMismatch(sflags.MustGetString(cmd, onModuleHashMistmatchFlag)) cli.NoError(err, "invalid mistmatch mode") - dbLoader, err := db.NewLoader(psqlDSN, flushInterval, moduleMismatchMode, zlog, tracer) + dbLoader, err := db.NewLoader(psqlDSN, flushInterval, moduleMismatchMode, handleReorgs, zlog, tracer) if err != nil { return nil, fmt.Errorf("new psql loader: %w", err) } if err := dbLoader.LoadTables(); err != nil { - var e *db.CursorError + var e *db.SystemTableError if errors.As(err, &e) { - fmt.Printf("Error validating the cursors table: %s\n", e) - fmt.Println("You can use the following sql schema to create a cursors table") - fmt.Println() - fmt.Println(dbLoader.GetCreateCursorsTableSQL(false)) - fmt.Println() - return nil, fmt.Errorf("invalid cursors table") + fmt.Printf("Error validating the system table: %s\n", e) + fmt.Println("Did you run setup ?") + return nil, e } return nil, fmt.Errorf("load psql table: %w", err) diff --git a/cmd/substreams-sink-sql/generate_csv.go b/cmd/substreams-sink-sql/generate_csv.go index 963b9af..8aec4a4 100644 --- a/cmd/substreams-sink-sql/generate_csv.go +++ b/cmd/substreams-sink-sql/generate_csv.go @@ -100,7 +100,7 @@ func generateCsvE(cmd *cobra.Command, args []string) error { return fmt.Errorf("new base sinker: %w", err) } - dbLoader, err := newDBLoader(cmd, dsn, 0) // flush interval not used in CSV mode + dbLoader, err := newDBLoader(cmd, dsn, 0, false) // flush interval not used in CSV mode if err != nil { return fmt.Errorf("new db loader: %w", err) } diff --git a/cmd/substreams-sink-sql/run.go b/cmd/substreams-sink-sql/run.go index e79cf78..701985f 100644 --- a/cmd/substreams-sink-sql/run.go +++ b/cmd/substreams-sink-sql/run.go @@ -13,14 +13,21 @@ import ( "github.com/streamingfast/substreams/manifest" ) +type ignoreUndoBufferSize struct{} + +func (i ignoreUndoBufferSize) IsIgnored(in string) bool { + return in == "undo-buffer-size" +} + var sinkRunCmd = Command(sinkRunE, "run [:]", "Runs SQL sink process", RangeArgs(2, 3), Flags(func(flags *pflag.FlagSet) { - sink.AddFlagsToSet(flags) + sink.AddFlagsToSet(flags, ignoreUndoBufferSize{}) AddCommonSinkerFlags(flags) + flags.Int("undo-buffer-size", 0, "If non-zero, handling of reorgs in the database is disabled. Instead, a buffer is introduced to only process a blocks once it has been confirmed by that many blocks, introducing a latency but slightly reducing the load on the database when close to head.") flags.Int("flush-interval", 1000, "When in catch up mode, flush every N blocks") flags.StringP("endpoint", "e", "", "Specify the substreams endpoint, ex: `mainnet.eth.streamingfast.io:443`") }), @@ -54,7 +61,8 @@ func sinkRunE(cmd *cobra.Command, args []string) error { return err } - // "github.com/streamingfast/substreams/manifest" + handleReorgs := sflags.MustGetInt(cmd, "undo-buffer-size") == 0 + sink, err := sink.NewFromViper( cmd, supportedOutputTypes, @@ -69,7 +77,7 @@ func sinkRunE(cmd *cobra.Command, args []string) error { return fmt.Errorf("new base sinker: %w", err) } - dbLoader, err := newDBLoader(cmd, dsn, sflags.MustGetDuration(cmd, "flush-interval")) + dbLoader, err := newDBLoader(cmd, dsn, sflags.MustGetDuration(cmd, "flush-interval"), handleReorgs) if err != nil { return fmt.Errorf("new db loader: %w", err) } diff --git a/cmd/substreams-sink-sql/setup.go b/cmd/substreams-sink-sql/setup.go index 9a1f8d2..f779562 100644 --- a/cmd/substreams-sink-sql/setup.go +++ b/cmd/substreams-sink-sql/setup.go @@ -44,12 +44,12 @@ func sinkSetupE(cmd *cobra.Command, args []string) error { return fmt.Errorf("extract sink config: %w", err) } - dbLoader, err := db.NewLoader(dsn, 0, db.OnModuleHashMismatchError, zlog, tracer) + dbLoader, err := db.NewLoader(dsn, 0, db.OnModuleHashMismatchError, true, zlog, tracer) if err != nil { return fmt.Errorf("new psql loader: %w", err) } - err = dbLoader.SetupFromBytes(ctx, []byte(sinkConfig.Schema), sflags.MustGetBool(cmd, "postgraphile")) + err = dbLoader.Setup(ctx, []byte(sinkConfig.Schema), sflags.MustGetBool(cmd, "postgraphile")) if err != nil { if isDuplicateTableError(err) && ignoreDuplicateTableErrors { zlog.Info("received duplicate table error, script dit not executed succesfully completed") diff --git a/cmd/substreams-sink-sql/tools.go b/cmd/substreams-sink-sql/tools.go index ca38bf1..b8d880f 100644 --- a/cmd/substreams-sink-sql/tools.go +++ b/cmd/substreams-sink-sql/tools.go @@ -65,7 +65,7 @@ var sinkToolsCmd = Group( ) func toolsReadCursorE(cmd *cobra.Command, _ []string) error { - loader := toolsCreateLoader(true) + loader := toolsCreateLoader() out, err := loader.GetAllCursors(cmd.Context()) cli.NoError(err, "Unable to get all cursors") @@ -83,7 +83,7 @@ func toolsReadCursorE(cmd *cobra.Command, _ []string) error { } func toolsWriteCursorE(cmd *cobra.Command, args []string) error { - loader := toolsCreateLoader(true) + loader := toolsCreateLoader() moduleHash := args[0] opaqueCursor := args[1] @@ -114,7 +114,7 @@ func toolsWriteCursorE(cmd *cobra.Command, args []string) error { } func toolsDeleteCursorE(cmd *cobra.Command, args []string) error { - loader := toolsCreateLoader(true) + loader := toolsCreateLoader() moduleHash := "" if !viper.GetBool("tools-cursor-delete-all") { @@ -143,18 +143,17 @@ func toolsDeleteCursorE(cmd *cobra.Command, args []string) error { return nil } -func toolsCreateLoader(enforceCursorTable bool) *db.Loader { +func toolsCreateLoader() *db.Loader { dsn := viper.GetString("tools-global-dsn") - loader, err := db.NewLoader(dsn, 0, db.OnModuleHashMismatchIgnore, zlog, tracer) + loader, err := db.NewLoader(dsn, 0, db.OnModuleHashMismatchIgnore, true, zlog, tracer) cli.NoError(err, "Unable to instantiate database manager from DSN %q", dsn) if err := loader.LoadTables(); err != nil { - var cursorError *db.CursorError - if errors.As(err, &cursorError) { - if enforceCursorTable { - fmt.Println("It seems the 'cursors' table does not exit on this database, unable to retrieve DB loader") - os.Exit(1) - } + var systemTableError *db.SystemTableError + if errors.As(err, &systemTableError) { + fmt.Printf("Error validating the system table: %s\n", systemTableError) + fmt.Println("Did you run setup ?") + os.Exit(1) } cli.NoError(err, "Unable to load table information from database") diff --git a/db/db.go b/db/db.go index c9b38bf..8515382 100644 --- a/db/db.go +++ b/db/db.go @@ -4,7 +4,6 @@ import ( "context" "database/sql" "fmt" - "os" "time" "github.com/jimsmart/schema" @@ -15,6 +14,7 @@ import ( ) const CURSORS_TABLE = "cursors" +const HISTORY_TABLE = "substreams_history" // Make the typing a bit easier type OrderedMap[K comparable, V any] struct { @@ -25,7 +25,7 @@ func NewOrderedMap[K comparable, V any]() *OrderedMap[K, V] { return &OrderedMap[K, V]{OrderedMap: orderedmap.New[K, V]()} } -type CursorError struct { +type SystemTableError struct { error } @@ -39,6 +39,7 @@ type Loader struct { tables map[string]*TableInfo cursorTable *TableInfo + handleReorgs bool flushInterval time.Duration moduleMismatchMode OnModuleHashMismatch @@ -52,6 +53,7 @@ func NewLoader( psqlDsn string, flushInterval time.Duration, moduleMismatchMode OnModuleHashMismatch, + handleReorgs bool, logger *zap.Logger, tracer logging.Tracer, ) (*Loader, error) { @@ -65,15 +67,6 @@ func NewLoader( return nil, fmt.Errorf("open db connection: %w", err) } - logger.Debug("created new DB loader", - zap.Duration("flush_interval", flushInterval), - zap.String("database", dsn.database), - zap.String("schema", dsn.schema), - zap.String("host", dsn.host), - zap.Int64("port", dsn.port), - zap.Stringer("on_module_hash_mismatch", moduleMismatchMode), - ) - l := &Loader{ DB: db, database: dsn.database, @@ -85,12 +78,28 @@ func NewLoader( logger: logger, tracer: tracer, } - _, err = l.tryDialect() if err != nil { return nil, fmt.Errorf("dialect not found: %s", err) } + if handleReorgs && l.getDialect().OnlyInserts() { + return nil, fmt.Errorf("driver %s does not support reorg handling. You must use set a non-zero undo-buffer-size", dsn.driver) + } + l.handleReorgs = handleReorgs + + logger.Info("created new DB loader", + zap.Duration("flush_interval", flushInterval), + zap.String("driver", dsn.driver), + zap.String("database", dsn.database), + zap.String("schema", dsn.schema), + zap.String("host", dsn.host), + zap.Int64("port", dsn.port), + zap.Stringer("on_module_hash_mismatch", moduleMismatchMode), + zap.Bool("handle_reorgs", l.handleReorgs), + zap.String("dialect", fmt.Sprintf("%t", l.getDialect())), + ) + return l, nil } @@ -123,6 +132,7 @@ func (l *Loader) LoadTables() error { } seenCursorTable := false + seenHistoryTable := false for schemaTableName, columns := range schemaTables { schemaName := schemaTableName[0] tableName := schemaTableName[1] @@ -142,6 +152,9 @@ func (l *Loader) LoadTables() error { seenCursorTable = true } + if tableName == HISTORY_TABLE { + seenHistoryTable = true + } columnByName := make(map[string]*ColumnInfo, len(columns)) for _, f := range columns { @@ -165,8 +178,12 @@ func (l *Loader) LoadTables() error { } if !seenCursorTable { - return &CursorError{fmt.Errorf(`%s.%s table is not found`, EscapeIdentifier(l.schema), CURSORS_TABLE)} + return &SystemTableError{fmt.Errorf(`%s.%s table is not found`, EscapeIdentifier(l.schema), CURSORS_TABLE)} } + if l.handleReorgs && !seenHistoryTable { + return &SystemTableError{fmt.Errorf("%s.%s table is not found and reorgs handling is enabled.", EscapeIdentifier(l.schema), HISTORY_TABLE)} + } + l.cursorTable = l.tables[CURSORS_TABLE] return nil @@ -174,7 +191,7 @@ func (l *Loader) LoadTables() error { func (l *Loader) validateCursorTables(columns []*sql.ColumnType) (err error) { if len(columns) != 4 { - return &CursorError{fmt.Errorf("table requires 4 columns ('id', 'cursor', 'block_num', 'block_id')")} + return &SystemTableError{fmt.Errorf("table requires 4 columns ('id', 'cursor', 'block_num', 'block_id')")} } columnsCheck := map[string]string{ "block_num": "int64", @@ -185,29 +202,29 @@ func (l *Loader) validateCursorTables(columns []*sql.ColumnType) (err error) { for _, f := range columns { columnName := f.Name() if _, found := columnsCheck[columnName]; !found { - return &CursorError{fmt.Errorf("unexpected column %q in cursors table", columnName)} + return &SystemTableError{fmt.Errorf("unexpected column %q in cursors table", columnName)} } expectedType := columnsCheck[columnName] actualType := f.ScanType().Kind().String() if expectedType != actualType { - return &CursorError{fmt.Errorf("column %q has invalid type, expected %q has %q", columnName, expectedType, actualType)} + return &SystemTableError{fmt.Errorf("column %q has invalid type, expected %q has %q", columnName, expectedType, actualType)} } delete(columnsCheck, columnName) } if len(columnsCheck) != 0 { for k := range columnsCheck { - return &CursorError{fmt.Errorf("missing column %q from cursors", k)} + return &SystemTableError{fmt.Errorf("missing column %q from cursors", k)} } } key, err := schema.PrimaryKey(l.DB, l.schema, CURSORS_TABLE) if err != nil { - return &CursorError{fmt.Errorf("failed getting primary key: %w", err)} + return &SystemTableError{fmt.Errorf("failed getting primary key: %w", err)} } if len(key) == 0 { - return &CursorError{fmt.Errorf("primary key not found: %w", err)} + return &SystemTableError{fmt.Errorf("primary key not found: %w", err)} } if key[0] != "id" { - return &CursorError{fmt.Errorf("column 'id' should be primary key not %q", key[0])} + return &SystemTableError{fmt.Errorf("column 'id' should be primary key not %q", key[0])} } return nil } @@ -258,20 +275,9 @@ func (l *Loader) MarshalLogObject(encoder zapcore.ObjectEncoder) error { return nil } -// Setup creates the schema and the cursors table where the is a local file -// on disk. -func (l *Loader) Setup(ctx context.Context, schemaFile string, withPostgraphile bool) error { - b, err := os.ReadFile(schemaFile) - if err != nil { - return fmt.Errorf("read schema file: %w", err) - } - - return l.SetupFromBytes(ctx, b, withPostgraphile) -} - -// SetupFromBytes creates the schema and the cursors table where the is a byte array +// Setup creates the schema, cursors and history table where the is a byte array // taken from somewhere. -func (l *Loader) SetupFromBytes(ctx context.Context, schemaBytes []byte, withPostgraphile bool) error { +func (l *Loader) Setup(ctx context.Context, schemaBytes []byte, withPostgraphile bool) error { schemaSql := string(schemaBytes) if err := l.getDialect().ExecuteSetupScript(ctx, l, schemaSql); err != nil { return fmt.Errorf("exec schema: %w", err) @@ -289,23 +295,20 @@ func (l *Loader) SetupFromBytes(ctx context.Context, schemaBytes []byte, withPos } func (l *Loader) setupCursorTable(ctx context.Context, withPostgraphile bool) error { - _, err := l.ExecContext(ctx, l.GetCreateCursorsTableSQL(withPostgraphile)) + query := l.getDialect().GetCreateCursorQuery(l.schema, withPostgraphile) + _, err := l.ExecContext(ctx, query) return err } func (l *Loader) setupHistoryTable(ctx context.Context) error { - _, err := l.ExecContext(ctx, l.GetCreateHistoryTableSQL()) + if l.getDialect().OnlyInserts() { + return nil + } + query := l.getDialect().GetCreateHistoryQuery(l.schema) + _, err := l.ExecContext(ctx, query) return err } -func (l *Loader) GetCreateCursorsTableSQL(withPostgraphile bool) string { - return l.getDialect().GetCreateCursorQuery(l.schema, withPostgraphile) -} - -func (l *Loader) GetCreateHistoryTableSQL() string { - return l.getDialect().GetCreateHistoryQuery(l.schema) -} - func (l *Loader) getDialect() dialect { d, _ := l.tryDialect() return d diff --git a/db/dialect_clickhouse.go b/db/dialect_clickhouse.go index 1b0daa0..f8c62b3 100644 --- a/db/dialect_clickhouse.go +++ b/db/dialect_clickhouse.go @@ -76,14 +76,12 @@ func (d clickhouseDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputMo } entryCount += entries.Len() } - // TODO: implement pruning return entryCount, nil } func (d clickhouseDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValidFinalBlock uint64) error { - // TODO implement revert - return nil + return fmt.Errorf("clickhouse driver does not support reorg management.") } func (d clickhouseDialect) GetCreateCursorQuery(schema string, withPostgraphile bool) string { @@ -96,24 +94,11 @@ func (d clickhouseDialect) GetCreateCursorQuery(schema string, withPostgraphile block_num Int64, block_id String ) Engine = ReplacingMergeTree() ORDER BY id; - `), EscapeIdentifier(schema), EscapeIdentifier("cursors")) + `), EscapeIdentifier(schema), EscapeIdentifier(CURSORS_TABLE)) } func (d clickhouseDialect) GetCreateHistoryQuery(schema string) string { - out := fmt.Sprintf(cli.Dedent(` - create table if not exists %s.%s - ( - id SERIAL PRIMARY KEY, - op char, - table_name text, - pk text, - prev_value text, - block_num bigint - ) Engine = ReplacingMergeTree() ORDER BY block_num; - `), - EscapeIdentifier(schema), EscapeIdentifier("history"), - ) - return out + panic("clickhouse does not support reorg management") } func (d clickhouseDialect) ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error { diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index def17ed..7d6e162 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -2,6 +2,7 @@ package db import ( "context" + "database/sql" "encoding/json" "fmt" "reflect" @@ -19,7 +20,7 @@ import ( type postgresDialect struct{} func (d postgresDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValidFinalBlock uint64) error { - query := fmt.Sprintf(`SELECT (op,table_name,pk,prev_value,block_num) FROM %s WHERE "block_num" > %d ORDER BY "block_num" DESC`, + query := fmt.Sprintf(`SELECT op,table_name,pk,prev_value,block_num FROM %s WHERE "block_num" > %d ORDER BY "block_num" DESC`, d.historyTable(l.schema), lastValidFinalBlock, ) @@ -29,22 +30,26 @@ func (d postgresDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValid return err } + l.logger.Info("reverting forked block block(s)", zap.Uint64("last_valid_final_block", lastValidFinalBlock)) if rows != nil { // rows will be nil with no error only in testing scenarios defer rows.Close() for rows.Next() { var op string var table_name string var pk string - var prev_value string + var prev_value_nullable sql.NullString var block_num uint64 - if err := rows.Scan(&op, &table_name, &pk, &prev_value, &block_num); err != nil { + if err := rows.Scan(&op, &table_name, &pk, &prev_value_nullable, &block_num); err != nil { return err } - if err := d.revertOp(tx, ctx, op, l.schema, table_name, pk, prev_value, block_num); err != nil { + l.logger.Debug("reverting", zap.String("operation", op), zap.String("table_name", table_name), zap.String("pk", pk), zap.Uint64("block_num", block_num)) + prev_value := prev_value_nullable.String + + if err := d.revertOp(tx, ctx, op, table_name, pk, prev_value, block_num); err != nil { return err } } - if rows.Err() != nil { + if err := rows.Err(); err != nil { return err } } @@ -92,7 +97,7 @@ func (d postgresDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputModu return rowCount, nil } -func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, schema, table_name, pk, prev_value string, block_num uint64) error { +func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, escaped_table_name, pk, prev_value string, block_num uint64) error { pkmap := make(map[string]string) if err := json.Unmarshal([]byte(pk), &pkmap); err != nil { @@ -100,18 +105,17 @@ func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, schema, table_ } switch op { case "I": - query := fmt.Sprintf(`DELETE FROM %s.%s WHERE %s;`, - EscapeIdentifier(schema), - EscapeIdentifier(table_name), + query := fmt.Sprintf(`DELETE FROM %s WHERE %s;`, + escaped_table_name, getPrimaryKeyWhereClause(pkmap), ) if _, err := tx.ExecContext(ctx, query); err != nil { return fmt.Errorf("executing revert query %q: %w", query, err) } case "D": - query := fmt.Sprintf(`INSERT INTO %s.%s SELECT * FROM json_populate_record(null:%s.%s,%s);`, - EscapeIdentifier(schema), EscapeIdentifier(table_name), - EscapeIdentifier(schema), EscapeIdentifier(table_name), + query := fmt.Sprintf(`INSERT INTO %s SELECT * FROM json_populate_record(null:%s,%s);`, + escaped_table_name, + escaped_table_name, escapeStringValue(prev_value), ) if _, err := tx.ExecContext(ctx, query); err != nil { @@ -124,11 +128,11 @@ func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, schema, table_ return err } - query := fmt.Sprintf(`UPDATE %s.%s SET(%s)=((SELECT %s FROM json_populate_record(null:%s.%s,%s))) WHERE %s;`, - EscapeIdentifier(schema), EscapeIdentifier(table_name), + query := fmt.Sprintf(`UPDATE %s SET(%s)=((SELECT %s FROM json_populate_record(null:%s,%s))) WHERE %s;`, + escaped_table_name, columns, columns, - EscapeIdentifier(schema), EscapeIdentifier(table_name), + escaped_table_name, escapeStringValue(prev_value), getPrimaryKeyWhereClause(pkmap), ) @@ -174,10 +178,10 @@ func (d postgresDialect) GetCreateCursorQuery(schema string, withPostgraphile bo block_num bigint, block_id text ); - `), EscapeIdentifier(schema), EscapeIdentifier("cursors")) + `), EscapeIdentifier(schema), EscapeIdentifier(CURSORS_TABLE)) if withPostgraphile { out += fmt.Sprintf("COMMENT ON TABLE %s.%s IS E'@omit';", - EscapeIdentifier(schema), EscapeIdentifier("cursors")) + EscapeIdentifier(schema), EscapeIdentifier(CURSORS_TABLE)) } return out } @@ -225,7 +229,7 @@ func (d postgresDialect) OnlyInserts() bool { } func (d postgresDialect) historyTable(schema string) string { - return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("history")) + return fmt.Sprintf("%s.%s", EscapeIdentifier(schema), EscapeIdentifier("substreams_history")) } func (d postgresDialect) saveInsert(schema string, table string, primaryKey map[string]string, blockNum uint64) string { diff --git a/db/dialect_postgres_test.go b/db/dialect_postgres_test.go index 427ecab..2d35620 100644 --- a/db/dialect_postgres_test.go +++ b/db/dialect_postgres_test.go @@ -97,7 +97,6 @@ func TestRevertOp(t *testing.T) { type row struct { op string - schema string table_name string pk string prev_value string @@ -112,8 +111,7 @@ func TestRevertOp(t *testing.T) { name: "rollback insert row", row: row{ op: "I", - schema: "testschema", - table_name: "xfer", + table_name: `"testschema"."xfer"`, pk: `{"id":"2345"}`, prev_value: "", // unused }, @@ -123,8 +121,7 @@ func TestRevertOp(t *testing.T) { name: "rollback delete row", row: row{ op: "D", - schema: "testschema", - table_name: "xfer", + table_name: `"testschema"."xfer"`, pk: `{"id":"2345"}`, prev_value: `{"id":"2345","sender":"0xdead","receiver":"0xbeef"}`, }, @@ -135,8 +132,7 @@ func TestRevertOp(t *testing.T) { name: "rollback update row", row: row{ op: "U", - schema: "testschema", - table_name: "xfer", + table_name: `"testschema"."xfer"`, pk: `{"id":"2345"}`, prev_value: `{"id":"2345","sender":"0xdead","receiver":"0xbeef"}`, }, @@ -151,7 +147,7 @@ func TestRevertOp(t *testing.T) { pd := postgresDialect{} row := test.row - err := pd.revertOp(tx, ctx, row.op, row.schema, row.table_name, row.pk, row.prev_value, 9999) + err := pd.revertOp(tx, ctx, row.op, row.table_name, row.pk, row.prev_value, 9999) require.NoError(t, err) assert.Equal(t, []string{test.expect}, tx.Results()) }) diff --git a/db/operations_test.go b/db/operations_test.go index 268df7a..2593527 100644 --- a/db/operations_test.go +++ b/db/operations_test.go @@ -19,7 +19,7 @@ func TestEscapeColumns(t *testing.T) { t.Skip(`PG_DSN not set, please specify PG_DSN to run this test, example: PG_DSN="psql://dev-node:insecure-change-me-in-prod@localhost:5432/dev-node?enable_incremental_sort=off&sslmode=disable"`) } - dbLoader, err := NewLoader(dsn, 0, OnModuleHashMismatchIgnore, zlog, tracer) + dbLoader, err := NewLoader(dsn, 0, OnModuleHashMismatchIgnore, false, zlog, tracer) require.NoError(t, err) tx, err := dbLoader.DB.Begin() @@ -68,7 +68,7 @@ func TestEscapeValues(t *testing.T) { t.Skip(`PG_DSN not set, please specify PG_DSN to run this test, example: PG_DSN="psql://dev-node:insecure-change-me-in-prod@localhost:5432/dev-node?enable_incremental_sort=off&sslmode=disable"`) } - dbLoader, err := NewLoader(dsn, 0, OnModuleHashMismatchIgnore, zlog, tracer) + dbLoader, err := NewLoader(dsn, 0, OnModuleHashMismatchIgnore, false, zlog, tracer) require.NoError(t, err) tx, err := dbLoader.DB.Begin() diff --git a/db/ops.go b/db/ops.go index 5c35619..2922ef0 100644 --- a/db/ops.go +++ b/db/ops.go @@ -81,13 +81,18 @@ func createRowUniqueID(m map[string]string) string { func (l *Loader) GetPrimaryKey(tableName string, pk string) (map[string]string, error) { primaryKeyColumns := l.tables[tableName].primaryColumns - // If there is exactly one primary key column, we assume that we should populate this column with the id of the "primary_key" field. - // If there is no primary key or a composite key, we simply ignore the primary_key input as we don't know where to write it. - if len(primaryKeyColumns) != 1 { - return map[string]string{"": pk}, nil + switch len(primaryKeyColumns) { + case 0: + return nil, fmt.Errorf("substreams sent a single primary key, but our sql table has none. This is unsupported.") + case 1: + return map[string]string{primaryKeyColumns[0].name: pk}, nil } - return map[string]string{primaryKeyColumns[0].name: pk}, nil + cols := make([]string, len(primaryKeyColumns)) + for i := range primaryKeyColumns { + cols[i] = primaryKeyColumns[i].name + } + return nil, fmt.Errorf("substreams sent a single primary key, but our sql table has a composite primary key (columns: %s). This is unsupported.", strings.Join(cols, ",")) } // Update a row in the DB, it is assumed the table exists, you can do a diff --git a/db/ops_test.go b/db/ops_test.go new file mode 100644 index 0000000..5c92729 --- /dev/null +++ b/db/ops_test.go @@ -0,0 +1,65 @@ +package db + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetPrimaryKey(t *testing.T) { + tests := []struct { + name string + in []*ColumnInfo + expectOut map[string]string + expectError bool + }{ + { + name: "no primkey error", + expectError: true, + }, + { + name: "more than one primkey error", + in: []*ColumnInfo{ + { + name: "one", + }, + { + name: "two", + }, + }, + expectError: true, + }, + { + name: "single than primkey ok", + in: []*ColumnInfo{ + { + name: "id", + }, + }, + expectOut: map[string]string{ + "id": "testval", + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + l := &Loader{ + tables: map[string]*TableInfo{ + "test": { + primaryColumns: test.in, + }, + }, + } + out, err := l.GetPrimaryKey("test", "testval") + if test.expectError { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, test.expectOut, out) + } + + }) + } + +} diff --git a/db/testing.go b/db/testing.go index f2372e3..39300ae 100644 --- a/db/testing.go +++ b/db/testing.go @@ -15,14 +15,14 @@ func NewTestLoader( tables map[string]*TableInfo, ) (*Loader, *TestTx) { - loader, err := NewLoader("psql://x:5432/x", 0, OnModuleHashMismatchIgnore, zlog, tracer) + loader, err := NewLoader("psql://x:5432/x", 0, OnModuleHashMismatchIgnore, true, zlog, tracer) if err != nil { panic(err) } loader.testTx = &TestTx{} loader.tables = tables loader.schema = schema - loader.cursorTable = tables["cursors"] + loader.cursorTable = tables[CURSORS_TABLE] return loader, loader.testTx } @@ -34,7 +34,7 @@ func TestTables(schema string) map[string]*TableInfo { "from": NewColumnInfo("from", "text", ""), "to": NewColumnInfo("to", "text", ""), }), - "cursors": mustNewTableInfo(schema, "cursors", []string{"id"}, map[string]*ColumnInfo{ + CURSORS_TABLE: mustNewTableInfo(schema, CURSORS_TABLE, []string{"id"}, map[string]*ColumnInfo{ "block_num": NewColumnInfo("id", "int64", ""), "block_id": NewColumnInfo("from", "text", ""), "cursor": NewColumnInfo("cursor", "text", ""), diff --git a/db/types.go b/db/types.go index 42cea85..a05d18c 100644 --- a/db/types.go +++ b/db/types.go @@ -17,11 +17,11 @@ import ( type OnModuleHashMismatch uint type TableInfo struct { - schema string - schemaEscaped string - name string - nameEscaped string - columnsByName map[string]*ColumnInfo + schema string + schemaEscaped string + name string + nameEscaped string + columnsByName map[string]*ColumnInfo primaryColumns []*ColumnInfo // Identifier is equivalent to 'escape().escape()' but pre-computed @@ -34,7 +34,7 @@ func NewTableInfo(schema, name string, pkList []string, columnsByName map[string nameEscaped := EscapeIdentifier(name) primaryColumns := make([]*ColumnInfo, len(pkList)) - for i, primaryKeyColumnName := range(pkList) { + for i, primaryKeyColumnName := range pkList { primaryColumn, found := columnsByName[primaryKeyColumnName] if !found { return nil, fmt.Errorf("primary key column %q not found", primaryKeyColumnName) @@ -42,15 +42,18 @@ func NewTableInfo(schema, name string, pkList []string, columnsByName map[string primaryColumns[i] = primaryColumn } + if len(primaryColumns) == 0 { + return nil, fmt.Errorf("sql sink requires a primary key in every table, none was found in table %s.%s", schema, name) + } return &TableInfo{ - schema: schema, - schemaEscaped: schemaEscaped, - name: name, - nameEscaped: nameEscaped, - identifier: schemaEscaped + "." + nameEscaped, + schema: schema, + schemaEscaped: schemaEscaped, + name: name, + nameEscaped: nameEscaped, + identifier: schemaEscaped + "." + nameEscaped, primaryColumns: primaryColumns, - columnsByName: columnsByName, + columnsByName: columnsByName, }, nil } diff --git a/devel/eth-block-meta/schema.sql b/devel/eth-block-meta/schema.sql index 9b67175..0efd184 100644 --- a/devel/eth-block-meta/schema.sql +++ b/devel/eth-block-meta/schema.sql @@ -7,12 +7,3 @@ create table block_meta parent_hash text, timestamp text ); - -create table if not exists "public"."cursors" -( - id text not null constraint cursor_pk primary key, - cursor text, - block_num bigint, - block_id text -); - diff --git a/sinker/sinker_test.go b/sinker/sinker_test.go index 8b17076..4073854 100644 --- a/sinker/sinker_test.go +++ b/sinker/sinker_test.go @@ -55,7 +55,7 @@ func TestInserts(t *testing.T) { }, expectSQL: []string{ `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 10;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 10;`, `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, }, @@ -76,11 +76,11 @@ func TestInserts(t *testing.T) { }, expectSQL: []string{ `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 10;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 10;`, `UPDATE "testschema"."cursors" set cursor = 'bN7dsAhRyo44yl_ykkjA36WwLpc_DFtvXwrlIBBBj4r2', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender2','2345','receiver2');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 11;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 11;`, `UPDATE "testschema"."cursors" set cursor = 'dR5-m-1v1TQvlVRfIM9SXaWwLpc_DFtuXwrkIBBAj4r3', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, }, @@ -95,9 +95,9 @@ func TestInserts(t *testing.T) { }, }, expectSQL: []string{ - `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234"}',10);` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234"}',10);` + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, }, @@ -119,14 +119,14 @@ func TestInserts(t *testing.T) { }, }, expectSQL: []string{ - `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234","idx":"3"}',10);` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234","idx":"3"}',10);` + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, - `INSERT INTO "testschema"."history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + `UPDATE "testschema"."xfer" SET "from"='sender2', "to"='receiver2' WHERE "id" = '2345' AND "idx" = '3'`, - `DELETE FROM "testschema"."history" WHERE block_num <= 6;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 6;`, `UPDATE "testschema"."cursors" set cursor = 'LamYQ1PoEJyzLTRd7kdEiKWwLpcyB1tlVArvLBtH', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, }, @@ -150,16 +150,16 @@ func TestInserts(t *testing.T) { }, }, expectSQL: []string{ - `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234","idx":"3"}',10);` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234","idx":"3"}',10);` + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, - //`INSERT INTO "testschema"."history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + //`INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + // `UPDATE "testschema"."xfer" SET "from"='sender2', "to"='receiver2' WHERE "id" = '2345' AND "idx" = '3'`, - `INSERT INTO "testschema"."history" (op,table_name,pk,prev_value,block_num) SELECT 'D','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'D','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + `DELETE FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3'`, - `DELETE FROM "testschema"."history" WHERE block_num <= 6;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 6;`, `UPDATE "testschema"."cursors" set cursor = 'LamYQ1PoEJyzLTRd7kdEiKWwLpcyB1tlVArvLBtH', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, }, @@ -185,20 +185,20 @@ func TestInserts(t *testing.T) { }, }, expectSQL: []string{ - `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234"}',10);` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"1234"}',10);` + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender1','1234','receiver1');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, - `INSERT INTO "testschema"."history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"2345"}',11);` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,block_num) values ('I','"testschema"."xfer"','{"id":"2345"}',11);` + `INSERT INTO "testschema"."xfer" ("from","id","to") VALUES ('sender2','2345','receiver2');`, - `DELETE FROM "testschema"."history" WHERE block_num <= 5;`, + `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'Euaqz6R-ylLG0gbdej7Me6WwLpcyB1tlVArvLxtE', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, `COMMIT`, - `SELECT (op,table_name,pk,prev_value,block_num) FROM "testschema"."history" WHERE "block_num" > 10 ORDER BY "block_num" DESC`, + `SELECT op,table_name,pk,prev_value,block_num FROM "testschema"."substreams_history" WHERE "block_num" > 10 ORDER BY "block_num" DESC`, //`DELETE FROM "testschema"."xfer" WHERE "id" = "2345";`, // this mechanism is tested in db.revertOp - `DELETE FROM "testschema"."history" WHERE "block_num" > 10;`, + `DELETE FROM "testschema"."substreams_history" WHERE "block_num" > 10;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, }, From eef621d96a3f5019a13711e9894ca28383ff85d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Mon, 6 Nov 2023 13:34:06 -0500 Subject: [PATCH 06/10] fix reorg sql statements for update/delete --- db/dialect_postgres.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index 7d6e162..f0794a8 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -242,19 +242,19 @@ func (d postgresDialect) saveInsert(schema string, table string, primaryKey map[ ) } -func (d postgresDialect) saveUpdate(schema string, escapedTable string, primaryKey map[string]string, blockNum uint64) string { - return d.saveRow("U", schema, escapedTable, primaryKey, blockNum) +func (d postgresDialect) saveUpdate(schema string, escapedTableName string, primaryKey map[string]string, blockNum uint64) string { + return d.saveRow("U", schema, escapedTableName, primaryKey, blockNum) } -func (d postgresDialect) saveDelete(schema string, escapedTable string, primaryKey map[string]string, blockNum uint64) string { - return d.saveRow("D", schema, escapedTable, primaryKey, blockNum) +func (d postgresDialect) saveDelete(schema string, escapedTableName string, primaryKey map[string]string, blockNum uint64) string { + return d.saveRow("D", schema, escapedTableName, primaryKey, blockNum) } -func (d postgresDialect) saveRow(op, schema, escapedTable string, primaryKey map[string]string, blockNum uint64) string { - return fmt.Sprintf(`INSERT INTO %s (op,table_name,pk,prev_value,block_num) SELECT %s,%s,%s,row_to_json(%s),%d FROM %s WHERE %s;`, +func (d postgresDialect) saveRow(op, schema, escapedTableName string, primaryKey map[string]string, blockNum uint64) string { + return fmt.Sprintf(`INSERT INTO %s (op,table_name,pk,prev_value,block_num) SELECT %s,%s,%s,row_to_json(%s),%d FROM %s.%s WHERE %s;`, d.historyTable(schema), - escapeStringValue(op), escapeStringValue(escapedTable), escapeStringValue(primaryKeyToJSON(primaryKey)), escapedTable, blockNum, - escapedTable, + escapeStringValue(op), escapeStringValue(escapedTableName), escapeStringValue(primaryKeyToJSON(primaryKey)), escapedTableName, blockNum, + EscapeIdentifier(schema), escapedTableName, getPrimaryKeyWhereClause(primaryKey), ) @@ -304,7 +304,7 @@ func (d *postgresDialect) prepareStatement(schema string, o *Operation) (string, ) if o.reversibleBlockNum != nil { - return d.saveUpdate(schema, o.table.identifier, o.primaryKey, *o.reversibleBlockNum) + updateQuery, nil + return d.saveUpdate(schema, o.table.nameEscaped, o.primaryKey, *o.reversibleBlockNum) + updateQuery, nil } return updateQuery, nil @@ -315,7 +315,7 @@ func (d *postgresDialect) prepareStatement(schema string, o *Operation) (string, primaryKeyWhereClause, ) if o.reversibleBlockNum != nil { - return d.saveDelete(schema, o.table.identifier, o.primaryKey, *o.reversibleBlockNum) + deleteQuery, nil + return d.saveDelete(schema, o.table.nameEscaped, o.primaryKey, *o.reversibleBlockNum) + deleteQuery, nil } return deleteQuery, nil From 72dcd2594754b9f4685a66053dec0f0590fc8f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Mon, 6 Nov 2023 13:51:48 -0500 Subject: [PATCH 07/10] fix update query missing double :: on postgres instruction --- db/dialect_postgres.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index f0794a8..a2a2331 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -40,17 +40,17 @@ func (d postgresDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValid var prev_value_nullable sql.NullString var block_num uint64 if err := rows.Scan(&op, &table_name, &pk, &prev_value_nullable, &block_num); err != nil { - return err + return fmt.Errorf("scanning row: %w", err) } l.logger.Debug("reverting", zap.String("operation", op), zap.String("table_name", table_name), zap.String("pk", pk), zap.Uint64("block_num", block_num)) prev_value := prev_value_nullable.String if err := d.revertOp(tx, ctx, op, table_name, pk, prev_value, block_num); err != nil { - return err + return fmt.Errorf("revertOp: %w", err) } } if err := rows.Err(); err != nil { - return err + return fmt.Errorf("iterating on rows from query %q: %w", query, err) } } pruneHistory := fmt.Sprintf(`DELETE FROM %s WHERE "block_num" > %d;`, @@ -59,7 +59,10 @@ func (d postgresDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastValid ) _, err = tx.ExecContext(ctx, pruneHistory) - return err + if err != nil { + return fmt.Errorf("executing pruneHistory: %w", err) + } + return nil } func (d postgresDialect) Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) { @@ -101,7 +104,7 @@ func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, escaped_table_ pkmap := make(map[string]string) if err := json.Unmarshal([]byte(pk), &pkmap); err != nil { - return err + return fmt.Errorf("revertOp: unmarshalling %q: %w", pk, err) } switch op { case "I": @@ -113,7 +116,7 @@ func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, escaped_table_ return fmt.Errorf("executing revert query %q: %w", query, err) } case "D": - query := fmt.Sprintf(`INSERT INTO %s SELECT * FROM json_populate_record(null:%s,%s);`, + query := fmt.Sprintf(`INSERT INTO %s SELECT * FROM json_populate_record(null::%s,%s);`, escaped_table_name, escaped_table_name, escapeStringValue(prev_value), @@ -128,7 +131,7 @@ func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, escaped_table_ return err } - query := fmt.Sprintf(`UPDATE %s SET(%s)=((SELECT %s FROM json_populate_record(null:%s,%s))) WHERE %s;`, + query := fmt.Sprintf(`UPDATE %s SET(%s)=((SELECT %s FROM json_populate_record(null::%s,%s))) WHERE %s;`, escaped_table_name, columns, columns, @@ -146,9 +149,9 @@ func (d postgresDialect) revertOp(tx Tx, ctx context.Context, op, escaped_table_ } func sqlColumnNamesFromJSON(in string) (string, error) { - valueMap := make(map[string]string) + valueMap := make(map[string]interface{}) if err := json.Unmarshal([]byte(in), &valueMap); err != nil { - return "", err + return "", fmt.Errorf("unmarshalling %q into valueMap: %w", in, err) } escapedNames := make([]string, len(valueMap)) i := 0 From 35588547fc213a439aec3aa1e91477d47d329f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Mon, 6 Nov 2023 14:00:46 -0500 Subject: [PATCH 08/10] fix reorg postgres tests --- db/dialect_postgres.go | 3 ++- db/dialect_postgres_test.go | 4 ++-- sinker/sinker_test.go | 7 ++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index a2a2331..842c973 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -254,9 +254,10 @@ func (d postgresDialect) saveDelete(schema string, escapedTableName string, prim } func (d postgresDialect) saveRow(op, schema, escapedTableName string, primaryKey map[string]string, blockNum uint64) string { + schemaAndTable := fmt.Sprintf("%s.%s", EscapeIdentifier(schema), escapedTableName) return fmt.Sprintf(`INSERT INTO %s (op,table_name,pk,prev_value,block_num) SELECT %s,%s,%s,row_to_json(%s),%d FROM %s.%s WHERE %s;`, d.historyTable(schema), - escapeStringValue(op), escapeStringValue(escapedTableName), escapeStringValue(primaryKeyToJSON(primaryKey)), escapedTableName, blockNum, + escapeStringValue(op), escapeStringValue(schemaAndTable), escapeStringValue(primaryKeyToJSON(primaryKey)), escapedTableName, blockNum, EscapeIdentifier(schema), escapedTableName, getPrimaryKeyWhereClause(primaryKey), ) diff --git a/db/dialect_postgres_test.go b/db/dialect_postgres_test.go index 2d35620..07a82a1 100644 --- a/db/dialect_postgres_test.go +++ b/db/dialect_postgres_test.go @@ -125,7 +125,7 @@ func TestRevertOp(t *testing.T) { pk: `{"id":"2345"}`, prev_value: `{"id":"2345","sender":"0xdead","receiver":"0xbeef"}`, }, - expect: `INSERT INTO "testschema"."xfer" SELECT * FROM json_populate_record(null:"testschema"."xfer",` + + expect: `INSERT INTO "testschema"."xfer" SELECT * FROM json_populate_record(null::"testschema"."xfer",` + `'{"id":"2345","sender":"0xdead","receiver":"0xbeef"}');`, }, { @@ -136,7 +136,7 @@ func TestRevertOp(t *testing.T) { pk: `{"id":"2345"}`, prev_value: `{"id":"2345","sender":"0xdead","receiver":"0xbeef"}`, }, - expect: `UPDATE "testschema"."xfer" SET("id","receiver","sender")=((SELECT "id","receiver","sender" FROM json_populate_record(null:"testschema"."xfer",` + + expect: `UPDATE "testschema"."xfer" SET("id","receiver","sender")=((SELECT "id","receiver","sender" FROM json_populate_record(null::"testschema"."xfer",` + `'{"id":"2345","sender":"0xdead","receiver":"0xbeef"}'))) WHERE "id" = '2345';`, }, } diff --git a/sinker/sinker_test.go b/sinker/sinker_test.go index 4073854..a60459e 100644 --- a/sinker/sinker_test.go +++ b/sinker/sinker_test.go @@ -124,7 +124,7 @@ func TestInserts(t *testing.T) { `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, - `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + `UPDATE "testschema"."xfer" SET "from"='sender2', "to"='receiver2' WHERE "id" = '2345' AND "idx" = '3'`, `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 6;`, `UPDATE "testschema"."cursors" set cursor = 'LamYQ1PoEJyzLTRd7kdEiKWwLpcyB1tlVArvLBtH', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, @@ -155,9 +155,10 @@ func TestInserts(t *testing.T) { `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 5;`, `UPDATE "testschema"."cursors" set cursor = 'i4tY9gOcWnhKoGjRCl2VUKWwLpcyB1plVAvvLxtE', block_num = 10, block_id = '10' WHERE id = '756e75736564';`, `COMMIT`, - //`INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + // the following gets deduped + //`INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'U','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + // `UPDATE "testschema"."xfer" SET "from"='sender2', "to"='receiver2' WHERE "id" = '2345' AND "idx" = '3'`, - `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'D','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("testschema"."xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + + `INSERT INTO "testschema"."substreams_history" (op,table_name,pk,prev_value,block_num) SELECT 'D','"testschema"."xfer"','{"id":"2345","idx":"3"}',row_to_json("xfer"),11 FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3';` + `DELETE FROM "testschema"."xfer" WHERE "id" = '2345' AND "idx" = '3'`, `DELETE FROM "testschema"."substreams_history" WHERE block_num <= 6;`, `UPDATE "testschema"."cursors" set cursor = 'LamYQ1PoEJyzLTRd7kdEiKWwLpcyB1tlVArvLBtH', block_num = 11, block_id = '11' WHERE id = '756e75736564';`, From 28197f7be9cda20f8dca8ebebbdd3615dd93abed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Mon, 6 Nov 2023 16:01:35 -0500 Subject: [PATCH 09/10] ensure history table is created with Postgraphile 'omit' annotation if needed --- db/db.go | 6 +++--- db/dialect.go | 2 +- db/dialect_clickhouse.go | 2 +- db/dialect_postgres.go | 6 +++++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/db/db.go b/db/db.go index 8515382..acc81be 100644 --- a/db/db.go +++ b/db/db.go @@ -287,7 +287,7 @@ func (l *Loader) Setup(ctx context.Context, schemaBytes []byte, withPostgraphile return fmt.Errorf("setup cursor table: %w", err) } - if err := l.setupHistoryTable(ctx); err != nil { + if err := l.setupHistoryTable(ctx, withPostgraphile); err != nil { return fmt.Errorf("setup history table: %w", err) } @@ -300,11 +300,11 @@ func (l *Loader) setupCursorTable(ctx context.Context, withPostgraphile bool) er return err } -func (l *Loader) setupHistoryTable(ctx context.Context) error { +func (l *Loader) setupHistoryTable(ctx context.Context, withPostgraphile bool) error { if l.getDialect().OnlyInserts() { return nil } - query := l.getDialect().GetCreateHistoryQuery(l.schema) + query := l.getDialect().GetCreateHistoryQuery(l.schema, withPostgraphile) _, err := l.ExecContext(ctx, query) return err } diff --git a/db/dialect.go b/db/dialect.go index ed708a7..ea1dae0 100644 --- a/db/dialect.go +++ b/db/dialect.go @@ -18,7 +18,7 @@ func (e UnknownDriverError) Error() string { type dialect interface { GetCreateCursorQuery(schema string, withPostgraphile bool) string - GetCreateHistoryQuery(schema string) string + GetCreateHistoryQuery(schema string, withPostgraphile bool) string ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error DriverSupportRowsAffected() bool GetUpdateCursorQuery(table, moduleHash string, cursor *sink.Cursor, block_num uint64, block_id string) string diff --git a/db/dialect_clickhouse.go b/db/dialect_clickhouse.go index f8c62b3..cccad24 100644 --- a/db/dialect_clickhouse.go +++ b/db/dialect_clickhouse.go @@ -97,7 +97,7 @@ func (d clickhouseDialect) GetCreateCursorQuery(schema string, withPostgraphile `), EscapeIdentifier(schema), EscapeIdentifier(CURSORS_TABLE)) } -func (d clickhouseDialect) GetCreateHistoryQuery(schema string) string { +func (d clickhouseDialect) GetCreateHistoryQuery(schema string, withPostgraphile bool) string { panic("clickhouse does not support reorg management") } diff --git a/db/dialect_postgres.go b/db/dialect_postgres.go index 842c973..426817d 100644 --- a/db/dialect_postgres.go +++ b/db/dialect_postgres.go @@ -189,7 +189,7 @@ func (d postgresDialect) GetCreateCursorQuery(schema string, withPostgraphile bo return out } -func (d postgresDialect) GetCreateHistoryQuery(schema string) string { +func (d postgresDialect) GetCreateHistoryQuery(schema string, withPostgraphile bool) string { out := fmt.Sprintf(cli.Dedent(` create table if not exists %s ( @@ -203,6 +203,10 @@ func (d postgresDialect) GetCreateHistoryQuery(schema string) string { `), d.historyTable(schema), ) + if withPostgraphile { + out += fmt.Sprintf("COMMENT ON TABLE %s.%s IS E'@omit';", + EscapeIdentifier(schema), EscapeIdentifier(HISTORY_TABLE)) + } return out } From badd052a5f61d2f31b3db6e4169b2921975e3a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Duchesneau?= Date: Tue, 7 Nov 2023 11:41:25 -0500 Subject: [PATCH 10/10] add --system-tables-only on setup command --- CHANGELOG.md | 2 +- cmd/substreams-sink-sql/setup.go | 9 ++++++++- db/db.go | 9 +++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d36213..b67a097 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * A change in your SQL schema may be required to keep existing substreams:SQL integrations working: * The presence of a primary key (single key or composite) is now *MANDATORY* on every table. * The `sf.substreams.sink.database.v1.TableChange` message, generated inside substreams, must now exactly match its primary key with the one in the SQL schema. - * You will need to re-run `setup` on your existing PostgreSQL databases to add the `substreams_history` table. + * You will need to re-run `setup` on your existing PostgreSQL databases to add the `substreams_history` table. You can use the new `--system-tables-only` flag to perform only that. * Since reorgs management is not yet supported on Clickhouse, users will have to set `--undo-buffer-size` to a non-zero value (`12` was the previous default) diff --git a/cmd/substreams-sink-sql/setup.go b/cmd/substreams-sink-sql/setup.go index f779562..2d6a04f 100644 --- a/cmd/substreams-sink-sql/setup.go +++ b/cmd/substreams-sink-sql/setup.go @@ -19,6 +19,7 @@ var sinkSetupCmd = Command(sinkSetupE, ExactArgs(2), Flags(func(flags *pflag.FlagSet) { flags.Bool("postgraphile", false, "Will append the necessary 'comments' on cursors table to fully support postgraphile") + flags.Bool("system-tables-only", false, "will only create/update the systems tables (cursors, substreams_history) and ignore the schema from the manifest") flags.Bool("ignore-duplicate-table-errors", false, "[Dev] Use this if you want to ignore duplicate table errors, take caution that this means the 'schemal.sql' file will not have run fully!") }), ) @@ -29,6 +30,7 @@ func sinkSetupE(cmd *cobra.Command, args []string) error { dsn := args[0] manifestPath := args[1] ignoreDuplicateTableErrors := sflags.MustGetBool(cmd, "ignore-duplicate-table-errors") + systemTableOnly := sflags.MustGetBool(cmd, "system-tables-only") reader, err := manifest.NewReader(manifestPath) if err != nil { @@ -49,7 +51,12 @@ func sinkSetupE(cmd *cobra.Command, args []string) error { return fmt.Errorf("new psql loader: %w", err) } - err = dbLoader.Setup(ctx, []byte(sinkConfig.Schema), sflags.MustGetBool(cmd, "postgraphile")) + schema := sinkConfig.Schema + if systemTableOnly { + schema = "" + } + + err = dbLoader.Setup(ctx, schema, sflags.MustGetBool(cmd, "postgraphile")) if err != nil { if isDuplicateTableError(err) && ignoreDuplicateTableErrors { zlog.Info("received duplicate table error, script dit not executed succesfully completed") diff --git a/db/db.go b/db/db.go index acc81be..c843f14 100644 --- a/db/db.go +++ b/db/db.go @@ -277,10 +277,11 @@ func (l *Loader) MarshalLogObject(encoder zapcore.ObjectEncoder) error { // Setup creates the schema, cursors and history table where the is a byte array // taken from somewhere. -func (l *Loader) Setup(ctx context.Context, schemaBytes []byte, withPostgraphile bool) error { - schemaSql := string(schemaBytes) - if err := l.getDialect().ExecuteSetupScript(ctx, l, schemaSql); err != nil { - return fmt.Errorf("exec schema: %w", err) +func (l *Loader) Setup(ctx context.Context, schemaSql string, withPostgraphile bool) error { + if schemaSql != "" { + if err := l.getDialect().ExecuteSetupScript(ctx, l, schemaSql); err != nil { + return fmt.Errorf("exec schema: %w", err) + } } if err := l.setupCursorTable(ctx, withPostgraphile); err != nil {