SAL: allow grace period to be lifted early if all clients have sent R…

…ECLAIM_COMPLETE Keep track of the number of clients on the recovery list, and the number on the list that have sent a RECLAIM_COMPLETE. v4.0 clients will never send a RECLAIM_COMPLETE, so when those values are equal we know we have all v4.1 clients that have finished reclaiming and so can lift the grace period immediately. Since we don't have a great way to detect whether any SM_NOTIFY requests were sent in conjunction with this restart, we'll only lift the grace period early if NLM is completely disabled (a'la Enable_NLM setting). Change-Id: Ie720da438220f7115d4458b4b995753d480d6491 Signed-off-by: Jeff Layton <[email protected]>
mapr · Nov 10, 2017 · 302ab52 · 302ab52
1 parent 476c206
commit 302ab52
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 5 deletions.
diff --git a/src/Protocols/NFS/nfs4_op_reclaim_complete.c b/src/Protocols/NFS/nfs4_op_reclaim_complete.c
@@ -49,6 +49,7 @@
 #include "nfs_proto_functions.h"
 #include "nfs_file_handle.h"
 #include "sal_data.h"
+#include "sal_functions.h"
 
 /**
  *
@@ -101,6 +102,7 @@ int nfs4_op_reclaim_complete(struct nfs_argop4 *op, compound_data_t *data,
 	if (!arg_RECLAIM_COMPLETE4->rca_one_fs) {
 		data->session->clientid_record->cid_cb.v41.
 		    cid_reclaim_complete = true;
+		nfs4_recovery_reclaim_complete(data->session->clientid_record);
 	}
 
 	return res_RECLAIM_COMPLETE4->rcr_status;

diff --git a/src/SAL/nfs4_recovery.c b/src/SAL/nfs4_recovery.c
@@ -46,6 +46,8 @@ time_t current_grace;
 pthread_mutex_t grace_mutex = PTHREAD_MUTEX_INITIALIZER;        /*< Mutex */
 struct glist_head clid_list = GLIST_HEAD_INIT(clid_list);  /*< Clients */
 struct nfs4_recovery_backend *recovery_backend;
+static int clid_count; /* protected by grace_mutex */
+static int32_t reclaim_completes; /* atomic */
 
 static void nfs4_recovery_load_clids_nolock(nfs_grace_start_t *gsp);
 static void nfs_release_nlm_state(char *release_ip);
@@ -56,7 +58,9 @@ clid_entry_t *nfs4_add_clid_entry(char *cl_name)
 	clid_entry_t *new_ent = gsh_malloc(sizeof(clid_entry_t));
 
 	strcpy(new_ent->cl_name, cl_name);
+	new_ent->cl_reclaim_complete = false;
 	glist_add(&clid_list, &new_ent->cl_list);
+	++clid_count;
 	return new_ent;
 }
 
@@ -78,6 +82,7 @@ void nfs4_cleanup_clid_entrys(void)
 					       cl_list)) != NULL) {
 		glist_del(&clid_entry->cl_list);
 		gsh_free(clid_entry);
+		--clid_count;
 	}
 }
 
@@ -149,19 +154,34 @@ int nfs_in_grace(void)
 {
 	int in_grace;
 	static int last_grace  = -1;
+	int32_t rc_count = 0;
 
 	if (nfs_param.nfsv4_param.graceless)
 		return 0;
 
-	in_grace = ((atomic_fetch_time_t(&current_grace) +
-		     nfs_param.nfsv4_param.grace_period) > time(NULL));
+
+	in_grace = 1;
+
+	/*
+	 * If we know there are no NLM clients, then we can consider the grace
+	 * period done when all previous clients have sent a RECLAIM_COMPLETE.
+	 */
+	rc_count = atomic_fetch_int32_t(&reclaim_completes);
+	if (!nfs_param.core_param.enable_NLM)
+		in_grace = (rc_count != clid_count);
+
+	/* Otherwise, wait for the timeout */
+	if (in_grace)
+		in_grace = ((atomic_fetch_time_t(&current_grace) +
+			     nfs_param.nfsv4_param.grace_period) > time(NULL));
 
 	if (in_grace != last_grace) {
-		LogEvent(COMPONENT_STATE, "NFS Server Now %s",
+		LogEvent(COMPONENT_STATE, "NFS Server Now %s ",
 			 in_grace ? "IN GRACE" : "NOT IN GRACE");
 		last_grace = in_grace;
 	} else if (in_grace) {
-		LogDebug(COMPONENT_STATE, "NFS Server IN GRACE");
+		LogDebug(COMPONENT_STATE, "NFS Server IN GRACE (%d:%d)",
+				clid_count, rc_count);
 	}
 
 	return in_grace;
@@ -213,6 +233,48 @@ static bool check_clid(nfs_client_id_t *clientid, clid_entry_t *clid_ent)
 	return ret;
 }
 
+void nfs4_recovery_reclaim_complete(nfs_client_id_t *clientid)
+{
+	struct glist_head *node;
+
+	/* If there are no clients */
+	if (clid_count == 0)
+		return;
+
+	/*
+	 * Loop through the list and try to find a matching client that hasn't
+	 * yet sent a reclaim_complete. If we find it, mark its clid_ent
+	 * record, and increment the reclaim_completes counter.
+	 */
+	PTHREAD_MUTEX_lock(&grace_mutex);
+	glist_for_each(node, &clid_list) {
+		clid_entry_t *clid_ent = glist_entry(node, clid_entry_t,
+						     cl_list);
+
+		/* Skip any that have already sent a reclaim_complete */
+		if (clid_ent->cl_reclaim_complete)
+			continue;
+
+		if (check_clid(clientid, clid_ent)) {
+			if (isDebug(COMPONENT_CLIENTID)) {
+				char str[LOG_BUFF_LEN] = "\0";
+				struct display_buffer dspbuf = {
+					sizeof(str), str, str};
+
+				display_client_id_rec(&dspbuf, clientid);
+
+				LogFullDebug(COMPONENT_CLIENTID,
+					     "RECLAIM_COMPLETE for %s",
+					     str);
+			}
+			clid_ent->cl_reclaim_complete = true;
+			atomic_inc_int32_t(&reclaim_completes);
+			break;
+		}
+	}
+	PTHREAD_MUTEX_unlock(&grace_mutex);
+}
+
 /**
  * @brief Determine whether or not this client may reclaim state
  *
@@ -230,7 +292,7 @@ void  nfs4_chk_clid_impl(nfs_client_id_t *clientid, clid_entry_t **clid_ent_arg)
 		 clientid->cid_clientid);
 
 	/* If there were no clients at time of restart, we're done */
-	if (glist_empty(&clid_list))
+	if (clid_count == 0)
 		return;
 
 	/*

diff --git a/src/include/sal_data.h b/src/include/sal_data.h
@@ -216,6 +216,7 @@ typedef struct rdel_fh {
 typedef struct clid_entry {
 	struct glist_head cl_list;	/*< Link in the list */
 	struct glist_head cl_rfh_list;
+	bool cl_reclaim_complete;
 	char cl_name[PATH_MAX];	/*< Client name */
 } clid_entry_t;
 

diff --git a/src/include/sal_functions.h b/src/include/sal_functions.h
@@ -968,6 +968,7 @@ void nfs4_start_grace(nfs_grace_start_t *gsp);
 int nfs_in_grace(void);
 void nfs4_add_clid(nfs_client_id_t *);
 void nfs4_rm_clid(nfs_client_id_t *);
+void nfs4_recovery_reclaim_complete(nfs_client_id_t *clientid);
 void nfs4_chk_clid(nfs_client_id_t *);
 void nfs4_recovery_load_clids(nfs_grace_start_t *gsp);
 void nfs4_recovery_cleanup(void);