From 0db4d555e98f90da320a802be67b75757eb78abc Mon Sep 17 00:00:00 2001 From: Igor Ryzhov Date: Sat, 3 Feb 2024 00:42:58 +0200 Subject: [PATCH 1/2] mgmtd, vtysh: fix possible conflict when reading the config When FRR starts, after mgmtd is initialized, backend clients connect to it and request their config. To supply the config, mgmtd creates a configuration transaction. At the same time, `vtysh -b` tries to read the startup config and configure mgmtd, which also creates a configuration transaction. If these two actions happen at the exact same time, there's a conflict between them, because only a single configuration translaction is allowed. Because of that, vtysh fails and the config is completely ignored. When starting the config reading, vtysh locks candidate and running datastores in mgmtd. This commit adds locking of running datastore when initializing the backend client. It allows to retry locking on the vtysh side and read the config only when the lock is aquired instead of failing. This change also prevents running datastore from being changed during initialization of backend clients. This could lead to a desynchronized state between mgmtd and backends. Signed-off-by: Igor Ryzhov --- mgmtd/mgmt_fe_adapter.c | 3 --- mgmtd/mgmt_txn.c | 28 +++++++++++++++++++++++++--- vtysh/vtysh_config.c | 8 +++++++- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/mgmtd/mgmt_fe_adapter.c b/mgmtd/mgmt_fe_adapter.c index 001da7680b8a..ec8e7733548b 100644 --- a/mgmtd/mgmt_fe_adapter.c +++ b/mgmtd/mgmt_fe_adapter.c @@ -711,9 +711,6 @@ mgmt_fe_session_handle_setcfg_req_msg(struct mgmt_fe_session_ctx *session, } if (session->cfg_txn_id == MGMTD_TXN_ID_NONE) { - /* as we have the lock no-one else should have a config txn */ - assert(!mgmt_config_txn_in_progress()); - /* Start a CONFIG Transaction (if not started already) */ session->cfg_txn_id = mgmt_create_txn(session->session_id, MGMTD_TXN_TYPE_CONFIG); diff --git a/mgmtd/mgmt_txn.c b/mgmtd/mgmt_txn.c index df2a1d852d09..664f42f4bad5 100644 --- a/mgmtd/mgmt_txn.c +++ b/mgmtd/mgmt_txn.c @@ -105,6 +105,7 @@ struct mgmt_commit_cfg_req { uint8_t abort : 1; uint8_t implicit : 1; uint8_t rollback : 1; + uint8_t init : 1; /* Track commit phases */ enum mgmt_commit_phase phase; @@ -750,6 +751,14 @@ static int mgmt_txn_send_commit_cfg_reply(struct mgmt_txn_ctx *txn, mgmt_history_rollback_complete(success); } + if (txn->commit_cfg_req->req.commit_cfg.init) { + /* + * This is the backend init request. + * We need to unlock the running datastore. + */ + mgmt_ds_unlock(txn->commit_cfg_req->req.commit_cfg.dst_ds_ctx); + } + txn->commit_cfg_req->req.commit_cfg.cmt_stats = NULL; mgmt_txn_req_free(&txn->commit_cfg_req); @@ -2081,15 +2090,26 @@ int mgmt_txn_notify_be_adapter_conn(struct mgmt_be_client_adapter *adapter, struct mgmt_commit_cfg_req *cmtcfg_req; static struct mgmt_commit_stats dummy_stats; struct nb_config_cbs *adapter_cfgs = NULL; + struct mgmt_ds_ctx *ds_ctx; memset(&dummy_stats, 0, sizeof(dummy_stats)); if (connect) { - /* Get config for this single backend client */ + ds_ctx = mgmt_ds_get_ctx_by_id(mm, MGMTD_DS_RUNNING); + assert(ds_ctx); + + /* + * Lock the running datastore to prevent any changes while we + * are initializing the backend. + */ + if (mgmt_ds_lock(ds_ctx, 0) != 0) + return -1; + /* Get config for this single backend client */ mgmt_be_get_adapter_config(adapter, &adapter_cfgs); if (!adapter_cfgs || RB_EMPTY(nb_config_cbs, adapter_cfgs)) { SET_FLAG(adapter->flags, MGMTD_BE_ADAPTER_FLAGS_CFG_SYNCED); + mgmt_ds_unlock(ds_ctx); return 0; } @@ -2101,6 +2121,7 @@ int mgmt_txn_notify_be_adapter_conn(struct mgmt_be_client_adapter *adapter, if (!txn) { __log_err("Failed to create CONFIG Transaction for downloading CONFIGs for client '%s'", adapter->name); + mgmt_ds_unlock(ds_ctx); nb_config_diff_del_changes(adapter_cfgs); return -1; } @@ -2114,10 +2135,11 @@ int mgmt_txn_notify_be_adapter_conn(struct mgmt_be_client_adapter *adapter, txn_req = mgmt_txn_req_alloc(txn, 0, MGMTD_TXN_PROC_COMMITCFG); txn_req->req.commit_cfg.src_ds_id = MGMTD_DS_NONE; txn_req->req.commit_cfg.src_ds_ctx = 0; - txn_req->req.commit_cfg.dst_ds_id = MGMTD_DS_NONE; - txn_req->req.commit_cfg.dst_ds_ctx = 0; + txn_req->req.commit_cfg.dst_ds_id = MGMTD_DS_RUNNING; + txn_req->req.commit_cfg.dst_ds_ctx = ds_ctx; txn_req->req.commit_cfg.validate_only = false; txn_req->req.commit_cfg.abort = false; + txn_req->req.commit_cfg.init = true; txn_req->req.commit_cfg.cmt_stats = &dummy_stats; txn_req->req.commit_cfg.cfg_chgs = adapter_cfgs; diff --git a/vtysh/vtysh_config.c b/vtysh/vtysh_config.c index 888f6a8c2162..15bcd343c979 100644 --- a/vtysh/vtysh_config.c +++ b/vtysh/vtysh_config.c @@ -616,7 +616,13 @@ static int vtysh_read_file(FILE *confp, bool dry_run) vty->node = CONFIG_NODE; vtysh_execute_no_pager("enable"); - vtysh_execute_no_pager("conf term file-lock"); + /* + * When reading the config, we need to wait until the lock is acquired. + * If we ignore the failure and continue without the lock, the config + * will be fully ignored. + */ + while (vtysh_execute_no_pager("conf term file-lock") == CMD_WARNING_CONFIG_FAILED) + usleep(100000); vty->vtysh_file_locked = true; if (!dry_run) From 2574f03a13d6fdd6580015c527c8d4b151579a70 Mon Sep 17 00:00:00 2001 From: Igor Ryzhov Date: Sat, 3 Feb 2024 01:15:46 +0200 Subject: [PATCH 2/2] vtysh: remove resync workaround when exiting to config node When exiting from a level below the config node, like `router rip`, vtysh executes a resync by sending "end" and "conf term [file-lock]" commands to all the daemons. As statet in the description comment, it's done "in case one of the daemons is somewhere else". I don't think this actually ever happens, but even if it is, it is a bug in a daemon that needs to be fixed. This resync was okay before the introduction of mgmtd, but now it unlocks and locks back the datastores during the configuration reading process, which can lead to a failure which is explained in the previous commit. Signed-off-by: Igor Ryzhov --- lib/vty.h | 4 ---- vtysh/vtysh.c | 30 +----------------------------- vtysh/vtysh_config.c | 2 -- 3 files changed, 1 insertion(+), 35 deletions(-) diff --git a/lib/vty.h b/lib/vty.h index 06973da9161f..a59ac7a652ee 100644 --- a/lib/vty.h +++ b/lib/vty.h @@ -232,10 +232,6 @@ struct vty { uintptr_t mgmt_req_pending_data; bool mgmt_locked_candidate_ds; bool mgmt_locked_running_ds; - /* Need to track when we file-lock in vtysh to re-lock on end/conf t - * workaround - */ - bool vtysh_file_locked; }; static inline void vty_push_context(struct vty *vty, int node, uint64_t id) diff --git a/vtysh/vtysh.c b/vtysh/vtysh.c index 4cb46b87a5ea..3290c8d54afa 100644 --- a/vtysh/vtysh.c +++ b/vtysh/vtysh.c @@ -1669,7 +1669,6 @@ static int vtysh_end(void) /* Nothing to do. */ break; default: - vty->vtysh_file_locked = false; vty->node = ENABLE_NODE; break; } @@ -2393,23 +2392,12 @@ DEFUNSH(VTYSH_REALLYALL, vtysh_disable, vtysh_disable_cmd, "disable", } DEFUNSH(VTYSH_REALLYALL, vtysh_config_terminal, vtysh_config_terminal_cmd, - "configure [terminal]", - "Configuration from vty interface\n" - "Configuration terminal\n") -{ - vty->node = CONFIG_NODE; - return CMD_SUCCESS; -} - -DEFUNSH(VTYSH_REALLYALL, vtysh_config_terminal_file_lock, - vtysh_config_terminal_file_lock_cmd, - "configure terminal file-lock", + "configure [terminal [file-lock]]", "Configuration from vty interface\n" "Configuration terminal\n" "Configuration with locked datastores\n") { vty->node = CONFIG_NODE; - vty->vtysh_file_locked = true; return CMD_SUCCESS; } @@ -2424,21 +2412,6 @@ static int vtysh_exit(struct vty *vty) if (cnode->parent_node) vty->node = cnode->parent_node; - if (vty->node == CONFIG_NODE) { - bool locked = vty->vtysh_file_locked; - - /* resync in case one of the daemons is somewhere else */ - vtysh_execute("end"); - /* NOTE: a rather expensive thing to do, can we avoid it? */ - - if (locked) - vtysh_execute("configure terminal file-lock"); - else - vtysh_execute("configure terminal"); - } else if (vty->node == ENABLE_NODE) { - vty->vtysh_file_locked = false; - } - return CMD_SUCCESS; } @@ -5125,7 +5098,6 @@ void vtysh_init_vty(void) if (!user_mode) install_element(VIEW_NODE, &vtysh_enable_cmd); install_element(ENABLE_NODE, &vtysh_config_terminal_cmd); - install_element(ENABLE_NODE, &vtysh_config_terminal_file_lock_cmd); install_element(ENABLE_NODE, &vtysh_disable_cmd); /* "exit" command. */ diff --git a/vtysh/vtysh_config.c b/vtysh/vtysh_config.c index 15bcd343c979..c207e4d42759 100644 --- a/vtysh/vtysh_config.c +++ b/vtysh/vtysh_config.c @@ -623,7 +623,6 @@ static int vtysh_read_file(FILE *confp, bool dry_run) */ while (vtysh_execute_no_pager("conf term file-lock") == CMD_WARNING_CONFIG_FAILED) usleep(100000); - vty->vtysh_file_locked = true; if (!dry_run) vtysh_execute_no_pager("XFRR_start_configuration"); @@ -635,7 +634,6 @@ static int vtysh_read_file(FILE *confp, bool dry_run) vtysh_execute_no_pager("XFRR_end_configuration"); vtysh_execute_no_pager("end"); - vty->vtysh_file_locked = false; vtysh_execute_no_pager("disable"); vty_close(vty);