From patchwork Wed Jul 17 00:56:09 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Numan Siddique ovn-northd
to gracefully terminate.
pause
resume
is-paused
ovn-northd
will automatically take over.
+ + You may run multiple OVN DB servers in an OVN deployment with: +
ovn-northd
also deployed on all these nodes,
+ using unix ctl sockets to connect to the local OVN DB servers.
+ + In such deployments, the ovn-northds on the passive nodes will process + the DB changes and compute logical flows to be thrown out later, + because write transactions are not allowed by the passive ovsdb-servers. + It results in unnecessary CPU usage. +
+ +
+ With the help of runtime management command pause
, you can
+ pause ovn-northd
on these nodes. When a passive node
+ becomes master, you can use the runtime management command
+ resume
to resume the ovn-northd
to process the
+ DB changes.
+
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index eb6c47cad..8e13901e0 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -52,6 +52,9 @@ VLOG_DEFINE_THIS_MODULE(ovn_northd); static unixctl_cb_func ovn_northd_exit; +static unixctl_cb_func ovn_northd_pause; +static unixctl_cb_func ovn_northd_resume; +static unixctl_cb_func ovn_northd_is_paused; struct northd_context { struct ovsdb_idl *ovnnb_idl; @@ -9182,6 +9185,7 @@ main(int argc, char *argv[]) struct unixctl_server *unixctl; int retval; bool exiting; + bool paused; fatal_ignore_sigpipe(); ovs_cmdl_proctitle_init(argc, argv); @@ -9196,6 +9200,10 @@ main(int argc, char *argv[]) exit(EXIT_FAILURE); } unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting); + unixctl_command_register("pause", "", 0, 0, ovn_northd_pause, &paused); + unixctl_command_register("resume", "", 0, 0, ovn_northd_resume, &paused); + unixctl_command_register("is-paused", "", 0, 0, ovn_northd_is_paused, + &paused); daemonize_complete(); @@ -9384,34 +9392,51 @@ main(int argc, char *argv[]) /* Main loop. */ exiting = false; + paused = false; while (!exiting) { - struct northd_context ctx = { - .ovnnb_idl = ovnnb_idl_loop.idl, - .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop), - .ovnsb_idl = ovnsb_idl_loop.idl, - .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop), - .sbrec_ha_chassis_grp_by_name = sbrec_ha_chassis_grp_by_name, - .sbrec_mcast_group_by_name_dp = sbrec_mcast_group_by_name_dp, - .sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp, - }; - - if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { - VLOG_INFO("ovn-northd lock acquired. " - "This ovn-northd instance is now active."); - had_lock = true; - } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { - VLOG_INFO("ovn-northd lock lost. " - "This ovn-northd instance is now on standby."); - had_lock = false; - } - - if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { - ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop); - if (ctx.ovnsb_txn) { - check_and_add_supported_dhcp_opts_to_sb_db(&ctx); - check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx); - check_and_update_rbac(&ctx); + /* unixctl_server_run could modify the value of 'paused'. + * So store the value in local 'paused_' so that we run + * 'ovsdb_idl_loop_commit_and_wait() at the end of the loop. */ + bool paused_ = paused; + + if (!paused_) { + struct northd_context ctx = { + .ovnnb_idl = ovnnb_idl_loop.idl, + .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop), + .ovnsb_idl = ovnsb_idl_loop.idl, + .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop), + .sbrec_ha_chassis_grp_by_name = sbrec_ha_chassis_grp_by_name, + .sbrec_mcast_group_by_name_dp = sbrec_mcast_group_by_name_dp, + .sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp, + }; + + if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { + VLOG_INFO("ovn-northd lock acquired. " + "This ovn-northd instance is now active."); + had_lock = true; + } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { + VLOG_INFO("ovn-northd lock lost. " + "This ovn-northd instance is now on standby."); + had_lock = false; } + + if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { + ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop); + if (ctx.ovnsb_txn) { + check_and_add_supported_dhcp_opts_to_sb_db(&ctx); + check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx); + check_and_update_rbac(&ctx); + } + } + } else { + /* ovn-northd is paused + * - we still want to handle any db updates and update the + * local IDL. Otherwise, when it is resumed, the local IDL + * copy will be out of sync. + * - but we don't want to create any txns. + * */ + ovsdb_idl_run(ovnnb_idl_loop.idl); + ovsdb_idl_run(ovnsb_idl_loop.idl); } unixctl_server_run(unixctl); @@ -9419,8 +9444,16 @@ main(int argc, char *argv[]) if (exiting) { poll_immediate_wake(); } - ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop); - ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); + + if (!paused_) { + ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop); + ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); + } else { + /* ovn-northd is paused, but we still want to wake up for any db + * updates. */ + ovsdb_idl_wait(ovnnb_idl_loop.idl); + ovsdb_idl_wait(ovnsb_idl_loop.idl); + } poll_block(); if (should_service_stop()) { @@ -9445,3 +9478,35 @@ ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, unixctl_command_reply(conn, NULL); } + +static void +ovn_northd_pause(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *pause_) +{ + bool *pause = pause_; + *pause = true; + + unixctl_command_reply(conn, NULL); +} + +static void +ovn_northd_resume(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *pause_) +{ + bool *pause = pause_; + *pause = false; + + unixctl_command_reply(conn, NULL); +} + +static void +ovn_northd_is_paused(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *paused_) +{ + bool *paused = paused_; + if (*paused) { + unixctl_command_reply(conn, "true"); + } else { + unixctl_command_reply(conn, "false"); + } +} diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 62e58fd0e..0dea04edc 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -898,3 +898,41 @@ as northd OVS_APP_EXIT_AND_WAIT([ovn-northd]) AT_CLEANUP + +AT_SETUP([ovn -- ovn-northd pause and resume]) +AT_SKIP_IF([test $HAVE_PYTHON = no]) +ovn_start + +AT_CHECK([test xfalse = x`as northd ovs-appctl -t ovn-northd is-paused`]) + +ovn-nbctl ls-add sw0 + +OVS_WAIT_UNTIL([ + ovn-sbctl lflow-list sw0 + test 0 = $?]) + +ovn-nbctl ls-del sw0 +OVS_WAIT_UNTIL([ + ovn-sbctl lflow-list sw0 + test 1 = $?]) + +# Now pause the ovn-northd +as northd ovs-appctl -t ovn-northd pause +AT_CHECK([test xtrue = x`as northd ovs-appctl -t ovn-northd is-paused`]) + +ovn-nbctl ls-add sw0 + +# There should be no logical flows for sw0 datapath. +OVS_WAIT_UNTIL([ + ovn-sbctl lflow-list sw0 + test 1 = $?]) + +# Now resume ovn-northd +as northd ovs-appctl -t ovn-northd resume +AT_CHECK([test xfalse = x`as northd ovs-appctl -t ovn-northd is-paused`]) + +OVS_WAIT_UNTIL([ + ovn-sbctl lflow-list sw0 + test 0 = $?]) + +AT_CLEANUP