From 9f4585508ad7ed49d3737bfedab81a9ce5f14e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Rold=C3=A1n=20Betancort?= Date: Mon, 8 Apr 2024 10:47:04 +0100 Subject: [PATCH] introduces a faster query to tuple GC we've noticed that even after https://github.com/authzed/spicedb/pull/1550 was introduced and made the nested GC SELECT query faster, deletions are still causing spikes in postgres and Aurora instances. The proposed query seemed 1 order of magnitude less cost that the current query on a test PG 15.6 with 5M dead tuples and 5M alive tuples. --- internal/datastore/postgres/gc.go | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/internal/datastore/postgres/gc.go b/internal/datastore/postgres/gc.go index 320f7bd81a..7414a838bf 100644 --- a/internal/datastore/postgres/gc.go +++ b/internal/datastore/postgres/gc.go @@ -15,20 +15,10 @@ import ( var ( _ common.GarbageCollector = (*pgDatastore)(nil) - relationTuplePKCols = []string{ - colNamespace, - colObjectID, - colRelation, - colUsersetNamespace, - colUsersetObjectID, - colUsersetRelation, - colCreatedXid, - colDeletedXid, - } - - namespacePKCols = []string{colNamespace, colCreatedXid, colDeletedXid} - - transactionPKCols = []string{colXID} + // we are using "tableoid" to globally identify the row through the "ctid" in partitioned environments + // as it's not guaranteed 2 rows in different partitions have different "ctid" values + // See https://www.postgresql.org/docs/current/ddl-system-columns.html#DDL-SYSTEM-COLUMNS-TABLEOID + gcPKCols = []string{"tableoid", "ctid"} ) func (pgd *pgDatastore) HasGCRun() bool { @@ -88,7 +78,7 @@ func (pgd *pgDatastore) DeleteBeforeTx(ctx context.Context, txID datastore.Revis removed.Relationships, err = pgd.batchDelete( ctx, tableTuple, - relationTuplePKCols, + gcPKCols, sq.Lt{colDeletedXid: minTxAlive}, ) if err != nil { @@ -102,7 +92,7 @@ func (pgd *pgDatastore) DeleteBeforeTx(ctx context.Context, txID datastore.Revis removed.Transactions, err = pgd.batchDelete( ctx, tableTransaction, - transactionPKCols, + gcPKCols, sq.Lt{colXID: minTxAlive}, ) if err != nil { @@ -113,7 +103,7 @@ func (pgd *pgDatastore) DeleteBeforeTx(ctx context.Context, txID datastore.Revis removed.Namespaces, err = pgd.batchDelete( ctx, tableNamespace, - namespacePKCols, + gcPKCols, sq.Lt{colDeletedXid: minTxAlive}, ) if err != nil { @@ -135,7 +125,6 @@ func (pgd *pgDatastore) batchDelete( } pkColsExpression := strings.Join(pkCols, ", ") - query := fmt.Sprintf(`WITH rows AS (%[1]s) DELETE FROM %[2]s WHERE (%[3]s) IN (SELECT %[3]s FROM rows);