From 76e9d9f5ded5fdc8f670a4014353e9504689e902 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Tue, 27 Aug 2024 11:57:32 -0700 Subject: [PATCH] kvs: call content.flush before checkpoint Problem: When the KVS module is unloaded, a checkpoint of the root reference is attempted. However, a content.flush is not done beforehand. This could result in an invalid checkpoint reference as data is not guaranteed to be flushed to the backing store. Solution: Call content.flush before checkpointing. Fixes #6237 --- src/modules/kvs/kvs.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/modules/kvs/kvs.c b/src/modules/kvs/kvs.c index 6b3b8a2179c9..06652e000ef7 100644 --- a/src/modules/kvs/kvs.c +++ b/src/modules/kvs/kvs.c @@ -2914,15 +2914,28 @@ static int checkpoint_get (flux_t *h, char *buf, size_t len, int *seq) */ static int checkpoint_put (flux_t *h, const char *rootref, int rootseq) { - flux_future_t *f = NULL; + flux_future_t *f1 = NULL; + flux_future_t *f2 = NULL; int rv = -1; - if (!(f = kvs_checkpoint_commit (h, NULL, rootref, rootseq, 0, 0)) - || flux_rpc_get (f, NULL) < 0) + /* first must ensure all content is flushed */ + if (!(f1 = flux_rpc (h, "content.flush", NULL, 0, 0)) + || flux_rpc_get (f1, NULL) < 0) { + /* fallthrough to kvs_checkpoint_commit(), ENOSYS may be due + * to no backing store, but checkpoint can still be done to + * content cache. + */ + if (errno != ENOSYS) + goto error; + } + + if (!(f2 = kvs_checkpoint_commit (h, NULL, rootref, rootseq, 0, 0)) + || flux_rpc_get (f2, NULL) < 0) goto error; rv = 0; error: - flux_future_destroy (f); + flux_future_destroy (f1); + flux_future_destroy (f2); return rv; }