[ovs-dev,2/2] fail-open: Add new fail safe mode 'standalone-loose'

Message ID 1522698460-83538-1-git-send-email-yihung.wei@gmail.com
State New
Headers show
Series
  • [ovs-dev,1/2] fail-open: Refactor NORMAL flow add/del
Related show

Commit Message

Yi-Hung Wei April 2, 2018, 7:47 p.m.
Currently, the standalone fail safe mode will add a NORMAL flow when the
controller is disconnected for a predefined period, and will remove the
NORMAL flow right away once the controller is reconnected.  Thus, user
may observe short disconnectivity from the time that the NORMAL flow is
removed till the controller write back the flows.

To avoid the disconnectivity during the timespan, this patch introduces
a new fail safe mode, standalone-loose, which behaves similarly as the
standalone mode, but it keeps the NORMAL flow when the controller is
reconnected.  It let the controller to decide whether to remote the
NORMAL flow later on.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
 ofproto/connmgr.c                                  | 41 +++++++++++++---------
 ofproto/fail-open.c                                | 32 ++++++++++++-----
 ofproto/fail-open.h                                |  5 ++-
 ofproto/ofproto.h                                  |  8 +++--
 tests/bridge.at                                    | 40 +++++++++++++++++++++
 utilities/ovs-vsctl.8.in                           | 10 +++---
 utilities/ovs-vsctl.c                              |  6 ++--
 vswitchd/bridge.c                                  | 12 ++++---
 vswitchd/vswitch.ovsschema                         |  7 ++--
 vswitchd/vswitch.xml                               |  4 +++
 .../etc_xapi.d_plugins_openvswitch-cfg-update      |  4 +--
 ...ensource_libexec_InterfaceReconfigureVswitch.py |  2 +-
 .../usr_share_openvswitch_scripts_ovs-xapi-sync    |  2 +-
 13 files changed, 129 insertions(+), 44 deletions(-)

Patch

diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c
index 964b8c8d85bb..77856a4e2c38 100644
--- a/ofproto/connmgr.c
+++ b/ofproto/connmgr.c
@@ -477,7 +477,8 @@  static void add_controller(struct connmgr *, const char *target, uint8_t dscp,
     OVS_REQUIRES(ofproto_mutex);
 static struct ofconn *find_controller_by_target(struct connmgr *,
                                                 const char *target);
-static void update_fail_open(struct connmgr *) OVS_EXCLUDED(ofproto_mutex);
+static void update_fail_open(struct connmgr *, enum ofproto_fail_mode)
+    OVS_EXCLUDED(ofproto_mutex);
 static int set_pvconns(struct pvconn ***pvconnsp, size_t *n_pvconnsp,
                        const struct sset *);
 
@@ -677,7 +678,7 @@  connmgr_set_controllers(struct connmgr *mgr,
     ovs_mutex_unlock(&ofproto_mutex);
 
     update_in_band_remotes(mgr);
-    update_fail_open(mgr);
+    update_fail_open(mgr, mgr->fail_mode);
     if (had_controllers != connmgr_has_controllers(mgr)) {
         ofproto_flush_flows(mgr->ofproto);
     }
@@ -805,13 +806,18 @@  update_in_band_remotes(struct connmgr *mgr)
 }
 
 static void
-update_fail_open(struct connmgr *mgr)
+update_fail_open(struct connmgr *mgr, enum ofproto_fail_mode fail_mode)
     OVS_EXCLUDED(ofproto_mutex)
 {
     if (connmgr_has_controllers(mgr)
-        && mgr->fail_mode == OFPROTO_FAIL_STANDALONE) {
+        && (fail_mode == OFPROTO_FAIL_STANDALONE
+            || fail_mode == OFPROTO_FAIL_STANDALONE_LOOSE)) {
         if (!mgr->fail_open) {
             mgr->fail_open = fail_open_create(mgr->ofproto, mgr);
+        } else if (fail_open_is_active(mgr->fail_open)) {
+            ovs_mutex_lock(&ofproto_mutex);
+            fail_open_update(mgr->fail_open, fail_mode);
+            ovs_mutex_unlock(&ofproto_mutex);
         }
     } else {
         ovs_mutex_lock(&ofproto_mutex);
@@ -819,6 +825,7 @@  update_fail_open(struct connmgr *mgr)
         ovs_mutex_unlock(&ofproto_mutex);
         mgr->fail_open = NULL;
     }
+    mgr->fail_mode = fail_mode;
 }
 
 static int
@@ -1771,22 +1778,22 @@  do_send_packet_ins(struct ofconn *ofconn, struct ovs_list *txq)
 
 /* Fail-open settings. */
 
-/* Returns the failure handling mode (OFPROTO_FAIL_SECURE or
- * OFPROTO_FAIL_STANDALONE) for 'mgr'. */
+/* Returns the failure handling mode (OFPROTO_FAIL_SECURE,
+ * OFPROTO_FAIL_STANDALONE, or OFPROTO_FAIL_STANDALONE_LOOSE) for 'mgr'. */
 enum ofproto_fail_mode
 connmgr_get_fail_mode(const struct connmgr *mgr)
 {
     return mgr->fail_mode;
 }
 
-/* Sets the failure handling mode for 'mgr' to 'fail_mode' (either
- * OFPROTO_FAIL_SECURE or OFPROTO_FAIL_STANDALONE). */
+/* Sets the failure handling mode for 'mgr' to 'fail_mode' (one of
+ * OFPROTO_FAIL_SECURE, OFPROTO_FAIL_STANDALONE,
+ * OFPROTO_FAIL_STANDALONE_LOOSE). */
 void
 connmgr_set_fail_mode(struct connmgr *mgr, enum ofproto_fail_mode fail_mode)
 {
     if (mgr->fail_mode != fail_mode) {
-        mgr->fail_mode = fail_mode;
-        update_fail_open(mgr);
+        update_fail_open(mgr, fail_mode);
         if (!connmgr_has_controllers(mgr)) {
             ofproto_flush_flows(mgr->ofproto);
         }
@@ -1939,15 +1946,17 @@  connmgr_flushed(struct connmgr *mgr)
     OVS_EXCLUDED(ofproto_mutex)
 {
     if (mgr->fail_open) {
-        fail_open_flushed(mgr->fail_open);
+        fail_open_flushed(mgr->fail_open, mgr->fail_mode);
     }
 
-    /* If there are no controllers and we're in standalone mode, set up a flow
-     * that matches every packet and directs them to OFPP_NORMAL (which goes to
-     * us).  Otherwise, the switch is in secure mode and we won't pass any
-     * traffic until a controller has been defined and it tells us to do so. */
+    /* If there are no controllers and we're in standalone or standalone-loose
+     * mode, set up a flow that matches every packet and directs them to
+     * OFPP_NORMAL (which goes to us).  Otherwise, the switch is in secure mode
+     * and we won't pass any traffic until a controller has been defined and it
+     * tells us to do so. */
     if (!connmgr_has_controllers(mgr)
-        && mgr->fail_mode == OFPROTO_FAIL_STANDALONE) {
+        && (mgr->fail_mode == OFPROTO_FAIL_STANDALONE
+            || mgr->fail_mode == OFPROTO_FAIL_STANDALONE_LOOSE)) {
         struct ofpbuf ofpacts;
         struct match match;
 
diff --git a/ofproto/fail-open.c b/ofproto/fail-open.c
index ded282895a10..d79e325ccb2d 100644
--- a/ofproto/fail-open.c
+++ b/ofproto/fail-open.c
@@ -146,20 +146,24 @@  send_bogus_packet_ins(struct fail_open *fo)
 }
 
 static void
-fail_open_del_normal_flow(struct fail_open *fo)
+fail_open_del_normal_flow(struct fail_open *fo, enum ofproto_fail_mode mode)
     OVS_REQUIRES(ofproto_mutex)
 {
     struct match match;
+    int priority = mode ==  OFPROTO_FAIL_STANDALONE ?
+        FAIL_OPEN_PRIORITY : FAIL_OPEN_PRIORITY_STANDALONE_LOOSE;
 
     match_init_catchall(&match);
-    ofproto_delete_flow(fo->ofproto, &match, FAIL_OPEN_PRIORITY);
+    ofproto_delete_flow(fo->ofproto, &match, priority);
 }
 
 static void
-fail_open_add_normal_flow(struct fail_open *fo)
+fail_open_add_normal_flow(struct fail_open *fo, enum ofproto_fail_mode mode)
 {
     struct ofpbuf ofpacts;
     struct match match;
+    int priority = mode ==  OFPROTO_FAIL_STANDALONE ?
+        FAIL_OPEN_PRIORITY : FAIL_OPEN_PRIORITY_STANDALONE_LOOSE;
 
     /* Set up a flow that matches every packet and directs them to
      * OFPP_NORMAL. */
@@ -167,8 +171,8 @@  fail_open_add_normal_flow(struct fail_open *fo)
     ofpact_put_OUTPUT(&ofpacts)->port = OFPP_NORMAL;
 
     match_init_catchall(&match);
-    ofproto_add_flow(fo->ofproto, &match, FAIL_OPEN_PRIORITY,
-            ofpacts.data, ofpacts.size);
+    ofproto_add_flow(fo->ofproto, &match, priority, ofpacts.data,
+                     ofpacts.size);
 
     ofpbuf_uninit(&ofpacts);
 }
@@ -237,7 +241,9 @@  fail_open_recover(struct fail_open *fo)
     fo->last_disconn_secs = 0;
     fo->next_bogus_packet_in = LLONG_MAX;
 
-    fail_open_del_normal_flow(fo);
+   if (connmgr_get_fail_mode(fo->connmgr) == OFPROTO_FAIL_STANDALONE) {
+        fail_open_del_normal_flow(fo, OFPROTO_FAIL_STANDALONE);
+   }
 }
 
 void
@@ -249,13 +255,13 @@  fail_open_wait(struct fail_open *fo)
 }
 
 void
-fail_open_flushed(struct fail_open *fo)
+fail_open_flushed(struct fail_open *fo, enum ofproto_fail_mode mode)
     OVS_EXCLUDED(ofproto_mutex)
 {
     int disconn_secs = connmgr_failure_duration(fo->connmgr);
     bool open = disconn_secs >= trigger_duration(fo);
     if (open) {
-        fail_open_add_normal_flow(fo);
+        fail_open_add_normal_flow(fo, mode);
     }
     fo->fail_open_active = open;
 }
@@ -296,3 +302,13 @@  fail_open_destroy(struct fail_open *fo)
         free(fo);
     }
 }
+
+void
+fail_open_update(struct fail_open *fo, enum ofproto_fail_mode fail_mode)
+    OVS_REQUIRES(ofproto_mutex)
+{
+    if (fo) {
+        fail_open_del_normal_flow(fo, connmgr_get_fail_mode(fo->connmgr));
+        fail_open_add_normal_flow(fo, fail_mode);
+    }
+}
diff --git a/ofproto/fail-open.h b/ofproto/fail-open.h
index 71442ffd6d4a..7745ab3675e5 100644
--- a/ofproto/fail-open.h
+++ b/ofproto/fail-open.h
@@ -30,6 +30,7 @@  struct ofproto;
  * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch
  * creates flows with this priority).  And "f0" is mnemonic for "fail open"! */
 #define FAIL_OPEN_PRIORITY 0xf0f0f0
+#define FAIL_OPEN_PRIORITY_STANDALONE_LOOSE 32768
 
 /* Returns true if 'rule' is one created by the "fail open" logic, false
  * otherwise. */
@@ -45,7 +46,9 @@  void fail_open_wait(struct fail_open *);
 bool fail_open_is_active(const struct fail_open *);
 void fail_open_run(struct fail_open *);
 void fail_open_maybe_recover(struct fail_open *) OVS_EXCLUDED(ofproto_mutex);
-void fail_open_flushed(struct fail_open *) OVS_EXCLUDED(ofproto_mutex);
+void fail_open_flushed(struct fail_open *, enum ofproto_fail_mode)
+    OVS_EXCLUDED(ofproto_mutex);
+void fail_open_update(struct fail_open *, enum ofproto_fail_mode);
 
 int fail_open_count_rules(const struct fail_open *);
 
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 8c85bbf7fb26..3f84d921a327 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -198,8 +198,12 @@  struct ofproto_mcast_snooping_port_settings {
 
 /* How the switch should act if the controller cannot be contacted. */
 enum ofproto_fail_mode {
-    OFPROTO_FAIL_SECURE,        /* Preserve flow table. */
-    OFPROTO_FAIL_STANDALONE     /* Act as a standalone switch. */
+    OFPROTO_FAIL_SECURE,                /* Preserve flow table. */
+    OFPROTO_FAIL_STANDALONE,            /* Act as a standalone switch. */
+    OFPROTO_FAIL_STANDALONE_LOOSE       /* Act as a standalone switch as the
+                                         * standalone mode, but leave the
+                                         * NORMAL flow when the controller
+                                         * is reconnected. */
 };
 
 enum ofproto_band {
diff --git a/tests/bridge.at b/tests/bridge.at
index 8b4e4d71d181..d916ab67540b 100644
--- a/tests/bridge.at
+++ b/tests/bridge.at
@@ -79,3 +79,43 @@  AT_CHECK([ovs-vsctl --columns=status list controller | dnl
 OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
 OVS_APP_EXIT_AND_WAIT([ovsdb-server])
 AT_CLEANUP
+
+AT_SETUP([bridge - standalone-loose fail safe mode])
+OVS_VSWITCHD_START
+
+dnl Add the controller br0
+AT_CHECK([ovs-vsctl set-controller br0 unix:controller])
+AT_CHECK([ovs-vsctl set-fail-mode br0 standalone-loose])
+
+dnl Enable fail-safe mode
+AT_CHECK([ovs-appctl time/warp 15000], [0], [ignore])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [dnl
+NXST_FLOW reply:
+ actions=NORMAL
+])
+
+dnl Start ovs-testcontroller, the NORMAL flow should be kept there
+AT_CHECK([ovs-testcontroller --detach punix:controller --pidfile], [0], [ignore])
+on_exit 'kill `cat ovs-testcontroller.pid`'
+OVS_WAIT_UNTIL([test -e controller])
+
+dnl Wait for the controller connectionsi to be up
+for i in `seq 0 19`
+do
+    if ovs-vsctl --columns=is_connected list controller |grep "false"; then
+        :
+    else
+        break
+    fi
+    ovs-appctl time/warp 1100
+done
+
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [dnl
+NXST_FLOW reply:
+ actions=NORMAL
+ priority=0 actions=CONTROLLER:128
+])
+
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+AT_CLEANUP
diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
index b18782c31d55..8b34e1320002 100644
--- a/utilities/ovs-vsctl.8.in
+++ b/utilities/ovs-vsctl.8.in
@@ -397,15 +397,17 @@  the controller fails, no new network connections can be set up.  If
 the connection to the controller stays down long enough, no packets
 can pass through the switch at all.
 .PP
-If the value is \fBstandalone\fR, or if neither of these settings
-is set, \fBovs\-vswitchd\fR will take over
+If the value is \fBstandalone\fR or \fBstandalone-loose\fR, or if neither of
+these settings is set, \fBovs\-vswitchd\fR will take over
 responsibility for setting up
 flows when no message has been received from the controller for three
 times the inactivity probe interval.  In this mode,
 \fBovs\-vswitchd\fR causes the datapath to act like an ordinary
 MAC-learning switch.  \fBovs\-vswitchd\fR will continue to retry connecting
 to the controller in the background and, when the connection succeeds,
-it discontinues its standalone behavior.
+it discontinues its standalone behavior.  Once the controller is reconnected,
+\fBstandalone\fB mode removes the setup flows right away, while the
+\fBstadalone-loose\fB mode keeps the flows.
 .PP
 If this option is set to \fBsecure\fR, \fBovs\-vswitchd\fR will not
 set up flows on its own when the controller connection fails.
@@ -416,7 +418,7 @@  Prints the configured failure mode.
 .IP "\fBdel\-fail\-mode\fR \fIbridge\fR"
 Deletes the configured failure mode.
 .
-.IP "\fBset\-fail\-mode\fR \fIbridge\fR \fBstandalone\fR|\fBsecure\fR"
+.IP "\fBset\-fail\-mode\fR \fIbridge\fR \fBstandalone\fR|\fBstandalone-loose\fR|\fBsecure\fR"
 Sets the configured failure mode.
 .
 .SS "Manager Connectivity"
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index c69e89e98418..1fab74f00755 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -2007,8 +2007,10 @@  cmd_set_fail_mode(struct ctl_context *ctx)
 
     br = find_real_bridge(vsctl_ctx, ctx->argv[1], true);
 
-    if (strcmp(fail_mode, "standalone") && strcmp(fail_mode, "secure")) {
-        ctl_fatal("fail-mode must be \"standalone\" or \"secure\"");
+    if (strcmp(fail_mode, "standalone") && strcmp(fail_mode, "secure") &&
+        strcmp(fail_mode, "standalone-loose")) {
+        ctl_fatal("fail-mode must be \"standalone\", \"standalone-loose\" or "
+                  "\"secure\"");
     }
 
     ovsrec_bridge_set_fail_mode(br->br_cfg, fail_mode);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index d90997e3a332..50df01d21216 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3673,10 +3673,14 @@  bridge_configure_remotes(struct bridge *br,
     free(ocs);
 
     /* Set the fail-mode. */
-    fail_mode = !br->cfg->fail_mode
-                || !strcmp(br->cfg->fail_mode, "standalone")
-                    ? OFPROTO_FAIL_STANDALONE
-                    : OFPROTO_FAIL_SECURE;
+    if (!br->cfg->fail_mode ||
+        !strcmp(br->cfg->fail_mode, "standalone")) {
+        fail_mode = OFPROTO_FAIL_STANDALONE;
+    } else if (!strcmp(br->cfg->fail_mode, "standalone-loose")) {
+        fail_mode = OFPROTO_FAIL_STANDALONE_LOOSE;
+    } else {
+        fail_mode = OFPROTO_FAIL_SECURE;
+    }
     ofproto_set_fail_mode(br->ofproto, fail_mode);
 
     /* Configure OpenFlow controller connection snooping. */
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 90e50b62612b..39138cdeb070 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@ 
 {"name": "Open_vSwitch",
- "version": "7.15.1",
- "cksum": "3682332033 23608",
+ "version": "7.16.0",
+ "cksum": "1548816770 23671",
  "tables": {
    "Open_vSwitch": {
      "columns": {
@@ -104,7 +104,8 @@ 
 	   "min": 0, "max": "unlimited"}},
        "fail_mode": {
          "type": {"key": {"type": "string",
-                          "enum": ["set", ["standalone", "secure"]]},
+                          "enum": ["set", ["standalone", "secure",
+                                           "standalone-loose"]]},
                   "min": 0, "max": 1}},
        "status": {
          "type": {"key": "string", "value": "string",
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index f899a19764a4..8a7f294902c6 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -815,6 +815,10 @@ 
           to retry connecting to the controller in the background
           and, when the connection succeeds, it will discontinue its
           standalone behavior.</dd>
+          <dt><code>standalone-loose</code></dt>
+          <dd>standalone-loose mode behaves similarly as the standalone
+          mode, except that it keeps the setup flows when the controller
+          is reconnected.</dd>
           <dt><code>secure</code></dt>
           <dd>Open vSwitch will not set up flows on its own when the
           controller connection fails or when no controllers are
diff --git a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
index e7404e3b0097..494c3858af81 100755
--- a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
+++ b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
@@ -150,10 +150,10 @@  def update(session, args):
         except KeyError, e:
             fail_mode = None
 
-        if fail_mode not in ['secure', 'standalone']:
+        if fail_mode not in ['secure', 'standalone', 'standalone-loose']:
             fail_mode = pool_fail_mode
 
-        if fail_mode != 'secure':
+        if fail_mode != 'secure' or fail_mode != 'standalone-loose':
             fail_mode = 'standalone'
 
         if bridge_fail_mode != fail_mode:
diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
index 53468b7064e5..3c7eb9f13578 100644
--- a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
+++ b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
@@ -400,7 +400,7 @@  def configure_datapath(pif):
     network = db().get_network_by_bridge(bridge)
     network_rec = None
     fail_mode = None
-    valid_fail_modes = ['standalone', 'secure']
+    valid_fail_modes = ['standalone', 'secure', 'standalone-loose']
 
     if network:
         network_rec = db().get_network_record(network)
diff --git a/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync b/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync
index ecd6f6d705c1..e79a422e1d74 100755
--- a/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync
+++ b/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync
@@ -201,7 +201,7 @@  def update_fail_mode(row):
             fail_mode = prec['other_config'].get(
                     'vswitch-controller-fail-mode')
 
-    if fail_mode not in ['standalone', 'secure']:
+    if fail_mode not in ['standalone', 'secure', 'standalone-loose']:
         fail_mode = 'standalone'
 
     row.verify("fail_mode")