@@ -70,6 +70,23 @@
<dd>
Causes <code>ovn-northd</code> to gracefully terminate.
</dd>
+
+ <dt><code>pause</code></dt>
+ <dd>
+ Pauses the ovn-northd operation from processing any Northbound and
+ Southbound database changes.
+ </dd>
+
+ <dt><code>resume</code></dt>
+ <dd>
+ Resumes the ovn-northd operation to process Northbound and
+ Southbound database contents and generate logical flows.
+ </dd>
+
+ <dt><code>is-paused</code></dt>
+ <dd>
+ Returns "true" if ovn-northd is currently paused, "false" otherwise.
+ </dd>
</dl>
</p>
@@ -82,6 +99,37 @@
of <code>ovn-northd</code> will automatically take over.
</p>
+ <h2> Active-Standby with multiple OVN DB servers</h2>
+ <p>
+ You may run multiple OVN DB servers in an OVN deployment with:
+ <ul>
+ <li>
+ OVN DB servers deployed in active/passive mode with one active
+ and multiple passive ovsdb-servers.
+ </li>
+
+ <li>
+ <code>ovn-northd</code> also deployed on all these nodes,
+ using unix ctl sockets to connect to the local OVN DB servers.
+ </li>
+ </ul>
+ </p>
+
+ <p>
+ In such deployments, the ovn-northds on the passive nodes will process
+ the DB changes and compute logical flows to be thrown out later,
+ because write transactions are not allowed by the passive ovsdb-servers.
+ It results in unnecessary CPU usage.
+ </p>
+
+ <p>
+ With the help of runtime management command <code>pause</code>, you can
+ pause <code>ovn-northd</code> on these nodes. When a passive node
+ becomes master, you can use the runtime management command
+ <code>resume</code> to resume the <code>ovn-northd</code> to process the
+ DB changes.
+ </p>
+
<h1>Logical Flow Table Structure</h1>
<p>
@@ -52,6 +52,9 @@
VLOG_DEFINE_THIS_MODULE(ovn_northd);
static unixctl_cb_func ovn_northd_exit;
+static unixctl_cb_func ovn_northd_pause;
+static unixctl_cb_func ovn_northd_resume;
+static unixctl_cb_func ovn_northd_is_paused;
struct northd_context {
struct ovsdb_idl *ovnnb_idl;
@@ -9182,6 +9185,7 @@ main(int argc, char *argv[])
struct unixctl_server *unixctl;
int retval;
bool exiting;
+ bool paused;
fatal_ignore_sigpipe();
ovs_cmdl_proctitle_init(argc, argv);
@@ -9196,6 +9200,10 @@ main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
+ unixctl_command_register("pause", "", 0, 0, ovn_northd_pause, &paused);
+ unixctl_command_register("resume", "", 0, 0, ovn_northd_resume, &paused);
+ unixctl_command_register("is-paused", "", 0, 0, ovn_northd_is_paused,
+ &paused);
daemonize_complete();
@@ -9384,34 +9392,51 @@ main(int argc, char *argv[])
/* Main loop. */
exiting = false;
+ paused = false;
while (!exiting) {
- struct northd_context ctx = {
- .ovnnb_idl = ovnnb_idl_loop.idl,
- .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
- .ovnsb_idl = ovnsb_idl_loop.idl,
- .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
- .sbrec_ha_chassis_grp_by_name = sbrec_ha_chassis_grp_by_name,
- .sbrec_mcast_group_by_name_dp = sbrec_mcast_group_by_name_dp,
- .sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp,
- };
-
- if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
- VLOG_INFO("ovn-northd lock acquired. "
- "This ovn-northd instance is now active.");
- had_lock = true;
- } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
- VLOG_INFO("ovn-northd lock lost. "
- "This ovn-northd instance is now on standby.");
- had_lock = false;
- }
-
- if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
- ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
- if (ctx.ovnsb_txn) {
- check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
- check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
- check_and_update_rbac(&ctx);
+ /* unixctl_server_run could modify the value of 'paused'.
+ * So store the value in local 'paused_' so that we run
+ * 'ovsdb_idl_loop_commit_and_wait() at the end of the loop. */
+ bool paused_ = paused;
+
+ if (!paused_) {
+ struct northd_context ctx = {
+ .ovnnb_idl = ovnnb_idl_loop.idl,
+ .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
+ .ovnsb_idl = ovnsb_idl_loop.idl,
+ .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
+ .sbrec_ha_chassis_grp_by_name = sbrec_ha_chassis_grp_by_name,
+ .sbrec_mcast_group_by_name_dp = sbrec_mcast_group_by_name_dp,
+ .sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp,
+ };
+
+ if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+ VLOG_INFO("ovn-northd lock acquired. "
+ "This ovn-northd instance is now active.");
+ had_lock = true;
+ } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+ VLOG_INFO("ovn-northd lock lost. "
+ "This ovn-northd instance is now on standby.");
+ had_lock = false;
}
+
+ if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+ ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
+ if (ctx.ovnsb_txn) {
+ check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
+ check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
+ check_and_update_rbac(&ctx);
+ }
+ }
+ } else {
+ /* ovn-northd is paused
+ * - we still want to handle any db updates and update the
+ * local IDL. Otherwise, when it is resumed, the local IDL
+ * copy will be out of sync.
+ * - but we don't want to create any txns.
+ * */
+ ovsdb_idl_run(ovnnb_idl_loop.idl);
+ ovsdb_idl_run(ovnsb_idl_loop.idl);
}
unixctl_server_run(unixctl);
@@ -9419,8 +9444,16 @@ main(int argc, char *argv[])
if (exiting) {
poll_immediate_wake();
}
- ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
- ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
+
+ if (!paused_) {
+ ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
+ ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
+ } else {
+ /* ovn-northd is paused, but we still want to wake up for any db
+ * updates. */
+ ovsdb_idl_wait(ovnnb_idl_loop.idl);
+ ovsdb_idl_wait(ovnsb_idl_loop.idl);
+ }
poll_block();
if (should_service_stop()) {
@@ -9445,3 +9478,35 @@ ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
unixctl_command_reply(conn, NULL);
}
+
+static void
+ovn_northd_pause(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *pause_)
+{
+ bool *pause = pause_;
+ *pause = true;
+
+ unixctl_command_reply(conn, NULL);
+}
+
+static void
+ovn_northd_resume(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *pause_)
+{
+ bool *pause = pause_;
+ *pause = false;
+
+ unixctl_command_reply(conn, NULL);
+}
+
+static void
+ovn_northd_is_paused(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *paused_)
+{
+ bool *paused = paused_;
+ if (*paused) {
+ unixctl_command_reply(conn, "true");
+ } else {
+ unixctl_command_reply(conn, "false");
+ }
+}
@@ -898,3 +898,41 @@ as northd
OVS_APP_EXIT_AND_WAIT([ovn-northd])
AT_CLEANUP
+
+AT_SETUP([ovn -- ovn-northd pause and resume])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+AT_CHECK([test xfalse = x`as northd ovs-appctl -t ovn-northd is-paused`])
+
+ovn-nbctl ls-add sw0
+
+OVS_WAIT_UNTIL([
+ ovn-sbctl lflow-list sw0
+ test 0 = $?])
+
+ovn-nbctl ls-del sw0
+OVS_WAIT_UNTIL([
+ ovn-sbctl lflow-list sw0
+ test 1 = $?])
+
+# Now pause the ovn-northd
+as northd ovs-appctl -t ovn-northd pause
+AT_CHECK([test xtrue = x`as northd ovs-appctl -t ovn-northd is-paused`])
+
+ovn-nbctl ls-add sw0
+
+# There should be no logical flows for sw0 datapath.
+OVS_WAIT_UNTIL([
+ ovn-sbctl lflow-list sw0
+ test 1 = $?])
+
+# Now resume ovn-northd
+as northd ovs-appctl -t ovn-northd resume
+AT_CHECK([test xfalse = x`as northd ovs-appctl -t ovn-northd is-paused`])
+
+OVS_WAIT_UNTIL([
+ ovn-sbctl lflow-list sw0
+ test 0 = $?])
+
+AT_CLEANUP