diff mbox series

[ovs-dev,ovn,v3,2/2] northd: Improve handling of pause and resume

Message ID 67a9869331f2702c1167a84d97072e807e4209d3.1574501015.git.frode.nordahl@canonical.com
State Accepted
Commit 75b3e40a55a5afba70dd7e0def14c944fff92df2
Headers show
Series northd: Improve pause, resume and status | expand

Commit Message

Frode Nordahl Nov. 21, 2019, 4:32 p.m. UTC
Move ``had_lock`` and ``paused`` state variables to new
``struct northd_state`` and pass the state struct to command
handlers.

On pause release the OVSDB lock on SB DB.

Re-instate aspiration for lock on resume.

Status command will now provide accurate information for 'active',
'paused', and 'standby' states.

Merge separate status command test into the pause and resume tests.

Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
---
 northd/ovn-northd.8.xml |  9 +++--
 northd/ovn-northd.c     | 89 +++++++++++++++++++++++++++--------------
 tests/ovn-northd.at     | 24 ++++++-----
 3 files changed, 80 insertions(+), 42 deletions(-)

Comments

0-day Robot Nov. 23, 2019, 9:56 a.m. UTC | #1
Bleep bloop.  Greetings Frode Nordahl, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
fatal: sha1 information is lacking or useless (tests/ovn-northd.at).
Repository lacks necessary blobs to fall back on 3-way merge.
Cannot fall back to three-way merge.
Patch failed at 0001 northd: Improve handling of pause and resume
The copy of the patch that failed is found in:
   /var/lib/jenkins/jobs/upstream_build_from_pw/workspace/OVN/.git/rebase-apply/patch
When you have resolved this problem, run "git am --resolved".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Frode Nordahl Nov. 23, 2019, 10:46 a.m. UTC | #2
As before, I believe the bot is in error here, it appears to apply the
patches in the reverse order. When I look at the mailing list archive,
the patches appear in reverse order there too, so it may be something
about how I submit them.

In any case, the blobs apply just fine, so I would like to request
reviews regardless of the robots remarks :-)
diff mbox series

Patch

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 9734e79e6..c6d5d96b9 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -74,13 +74,15 @@ 
       <dt><code>pause</code></dt>
       <dd>
         Pauses the ovn-northd operation from processing any Northbound and
-        Southbound database changes.
+        Southbound database changes.  This will also instruct ovn-northd to
+        drop any lock on SB DB.
       </dd>
 
       <dt><code>resume</code></dt>
       <dd>
         Resumes the ovn-northd operation to process Northbound and
-        Southbound database contents and generate logical flows.
+        Southbound database contents and generate logical flows.  This will
+        also instruct ovn-northd to aspire for the lock on SB DB.
       </dd>
 
       <dt><code>is-paused</code></dt>
@@ -91,7 +93,8 @@ 
       <dt><code>status</code></dt>
       <dd>
         Prints this server's status.  Status will be "active" if ovn-northd has
-        acquired OVSDB lock on NB DB, "standby" otherwise.
+        acquired OVSDB lock on SB DB, "standby" if it has not or "paused" if
+        this instance is paused.
       </dd>
       </dl>
     </p>
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index a943e1037..d58a59ffd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -67,6 +67,11 @@  struct northd_context {
     struct ovsdb_idl_index *sbrec_ip_mcast_by_dp;
 };
 
+struct northd_state {
+    bool had_lock;
+    bool paused;
+};
+
 /* An IPv4 or IPv6 address */
 struct v46_ip {
     int family;
@@ -10843,8 +10848,7 @@  main(int argc, char *argv[])
     struct unixctl_server *unixctl;
     int retval;
     bool exiting;
-    bool paused;
-    bool had_lock;
+    struct northd_state state;
 
     fatal_ignore_sigpipe();
     ovs_cmdl_proctitle_init(argc, argv);
@@ -10866,11 +10870,11 @@  main(int argc, char *argv[])
         exit(EXIT_FAILURE);
     }
     unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
-    unixctl_command_register("pause", "", 0, 0, ovn_northd_pause, &paused);
-    unixctl_command_register("resume", "", 0, 0, ovn_northd_resume, &paused);
+    unixctl_command_register("pause", "", 0, 0, ovn_northd_pause, &state);
+    unixctl_command_register("resume", "", 0, 0, ovn_northd_resume, &state);
     unixctl_command_register("is-paused", "", 0, 0, ovn_northd_is_paused,
-                             &paused);
-    unixctl_command_register("status", "", 0, 0, ovn_northd_status, &had_lock);
+                             &state);
+    unixctl_command_register("status", "", 0, 0, ovn_northd_status, &state);
 
     daemonize_complete();
 
@@ -11072,17 +11076,23 @@  main(int argc, char *argv[])
     struct ovsdb_idl_index *sbrec_ip_mcast_by_dp
         = ip_mcast_index_create(ovnsb_idl_loop.idl);
 
-    /* Ensure that only a single ovn-northd is active in the deployment by
-     * acquiring a lock called "ovn_northd" on the southbound database
-     * and then only performing DB transactions if the lock is held. */
-    ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
-
     /* Main loop. */
     exiting = false;
-    paused = false;
-    had_lock = false;
+    state.had_lock = false;
+    state.paused = false;
     while (!exiting) {
-        if (!paused) {
+        if (!state.paused) {
+            if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) &&
+                !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
+            {
+                /* Ensure that only a single ovn-northd is active in the
+                 * deployment by acquiring a lock called "ovn_northd" on the
+                 * southbound database and then only performing DB transactions
+                 * if the lock is held.
+                 */
+                ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
+            }
+
             struct northd_context ctx = {
                 .ovnnb_idl = ovnnb_idl_loop.idl,
                 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
@@ -11093,14 +11103,16 @@  main(int argc, char *argv[])
                 .sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp,
             };
 
-            if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+            if (!state.had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
                 VLOG_INFO("ovn-northd lock acquired. "
                         "This ovn-northd instance is now active.");
-                had_lock = true;
-            } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+                state.had_lock = true;
+            } else if (state.had_lock &&
+                       !ovsdb_idl_has_lock(ovnsb_idl_loop.idl))
+            {
                 VLOG_INFO("ovn-northd lock lost. "
                         "This ovn-northd instance is now on standby.");
-                had_lock = false;
+                state.had_lock = false;
             }
 
             if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
@@ -11121,6 +11133,15 @@  main(int argc, char *argv[])
              *      copy will be out of sync.
              *    - but we don't want to create any txns.
              * */
+            if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl) ||
+                ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
+            {
+                /* make sure we don't hold the lock while paused */
+                VLOG_INFO("This ovn-northd instance is now paused.");
+                ovsdb_idl_set_lock(ovnsb_idl_loop.idl, NULL);
+                state.had_lock = false;
+            }
+
             ovsdb_idl_run(ovnnb_idl_loop.idl);
             ovsdb_idl_run(ovnsb_idl_loop.idl);
             ovsdb_idl_wait(ovnnb_idl_loop.idl);
@@ -11159,30 +11180,30 @@  ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
 static void
 ovn_northd_pause(struct unixctl_conn *conn, int argc OVS_UNUSED,
-                const char *argv[] OVS_UNUSED, void *pause_)
+                const char *argv[] OVS_UNUSED, void *state_)
 {
-    bool *pause = pause_;
-    *pause = true;
+    struct northd_state  *state = state_;
+    state->paused = true;
 
     unixctl_command_reply(conn, NULL);
 }
 
 static void
 ovn_northd_resume(struct unixctl_conn *conn, int argc OVS_UNUSED,
-                  const char *argv[] OVS_UNUSED, void *pause_)
+                  const char *argv[] OVS_UNUSED, void *state_)
 {
-    bool *pause = pause_;
-    *pause = false;
+    struct northd_state *state = state_;
+    state->paused = false;
 
     unixctl_command_reply(conn, NULL);
 }
 
 static void
 ovn_northd_is_paused(struct unixctl_conn *conn, int argc OVS_UNUSED,
-                     const char *argv[] OVS_UNUSED, void *paused_)
+                     const char *argv[] OVS_UNUSED, void *state_)
 {
-    bool *paused = paused_;
-    if (*paused) {
+    struct northd_state *state = state_;
+    if (state->paused) {
         unixctl_command_reply(conn, "true");
     } else {
         unixctl_command_reply(conn, "false");
@@ -11191,15 +11212,23 @@  ovn_northd_is_paused(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
 static void
 ovn_northd_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
-                  const char *argv[] OVS_UNUSED, void *had_lock_)
+                  const char *argv[] OVS_UNUSED, void *state_)
 {
-    bool *had_lock = had_lock_;
+    struct northd_state *state = state_;
+    char *status;
+
+    if (state->paused) {
+        status = "paused";
+    } else {
+        status = state->had_lock ? "active" : "standby";
+    }
+
     /*
      * Use a labelled formatted output so we can add more to the status command
      * later without breaking any consuming scripts
      */
     struct ds s = DS_EMPTY_INITIALIZER;
-    ds_put_format(&s, "Status: %s\n", *had_lock ? "active" : "standby");
+    ds_put_format(&s, "Status: %s\n", status);
     unixctl_command_reply(conn, ds_cstr(&s));
     ds_destroy(&s);
 }
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 1c94f2f65..d73a22f68 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -899,22 +899,18 @@  OVS_APP_EXIT_AND_WAIT([ovn-northd])
 
 AT_CLEANUP
 
-AT_SETUP([ovn -- ovn-northd status])
-AT_SKIP_IF([test $HAVE_PYTHON = no])
-ovn_start
-
-AT_CHECK([as northd ovn-appctl -t ovn-northd status], [0], [Status: active
-])
-
-AT_CLEANUP
-
 AT_SETUP([ovn -- ovn-northd pause and resume])
 AT_SKIP_IF([test $HAVE_PYTHON = no])
 ovn_start
 
 AT_CHECK([test xfalse = x`as northd ovn-appctl -t ovn-northd is-paused`])
+AT_CHECK([as northd ovn-appctl -t ovn-northd status], [0], [Status: active
+])
 AT_CHECK([test xfalse = x`as northd-backup ovn-appctl -t ovn-northd \
 is-paused`])
+AT_CHECK([as northd-backup ovn-appctl -t ovn-northd status], [0],
+[Status: standby
+])
 
 ovn-nbctl ls-add sw0
 
@@ -931,7 +927,12 @@  OVS_WAIT_UNTIL([
 as northd ovs-appctl -t ovn-northd pause
 as northd-backup ovs-appctl -t ovn-northd pause
 AT_CHECK([test xtrue = x`as northd ovn-appctl -t ovn-northd is-paused`])
+AT_CHECK([as northd ovn-appctl -t ovn-northd status], [0], [Status: paused
+])
 AT_CHECK([test xtrue = x`as northd-backup ovn-appctl -t ovn-northd is-paused`])
+AT_CHECK([as northd-backup ovn-appctl -t ovn-northd status], [0],
+[Status: paused
+])
 
 ovn-nbctl ls-add sw0
 
@@ -944,8 +945,13 @@  OVS_WAIT_UNTIL([
 as northd ovs-appctl -t ovn-northd resume
 as northd-backup ovs-appctl -t ovn-northd resume
 AT_CHECK([test xfalse = x`as northd ovn-appctl -t ovn-northd is-paused`])
+AT_CHECK([as northd ovn-appctl -t ovn-northd status], [0], [Status: active
+])
 AT_CHECK([test xfalse = x`as northd-backup ovn-appctl -t ovn-northd \
 is-paused`])
+AT_CHECK([as northd-backup ovn-appctl -t ovn-northd status], [0],
+[Status: standby
+])
 
 OVS_WAIT_UNTIL([
     ovn-sbctl lflow-list sw0