[ovs-dev,v7,1/2] Avoid packet drop on LACP bond after link up
diff mbox series

Message ID 1560118744-28169-1-git-send-email-nitin.katiyar@ericsson.com
State Accepted
Headers show
Series
  • [ovs-dev,v7,1/2] Avoid packet drop on LACP bond after link up
Related show

Commit Message

Nitin Katiyar June 9, 2019, 2:17 p.m. UTC
Problem:
========
The OVS state machine that enables and disables bond slaves runs in 
the OVS main thread. The OVS code that processes received LACP packets
runs in a different thread. Until now, when the latter processes a LACP
PDU that should enable a slave, the slave was only enabled when the
main thread was able to run the state machine. In some cases this led
to delays of up to 350ms when the main thread was busy or not scheduled,
which led to corresponding delays in which packets were dropped due to
the bond-admissibility check.

Fix:
====
When a LACP PDU is received, evaluate whether LACP slave can be enabled
(slave_may_enable()) and set LACP slave's may_enable from the datapath
thread itself. When may_enable = TRUE, it means L1 state is UP and
LACP-SYNC is done and it is waiting for the main thread to enable the
slave. Relax the check in bond_check_admissibility() to check for both
"enable" and "may_enable" of the LACP slave. This would avoid dropping
of packets until the main thread enables the slave from bundle_run().

Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Signed-off-by: Nitin Katiyar <nitin.katiyar@ericsson.com>
---
 lib/lacp.c                   | 10 +++++++++-
 lib/lacp.h                   |  2 +-
 ofproto/bond.c               | 22 +++++++++++++++++++---
 ofproto/ofproto-dpif-xlate.c | 10 +++++++++-
 4 files changed, 38 insertions(+), 6 deletions(-)

Comments

Ben Pfaff June 10, 2019, 4:13 p.m. UTC | #1
On Sun, Jun 09, 2019 at 02:17:45PM +0000, Nitin Katiyar wrote:
> Problem:
> ========
> The OVS state machine that enables and disables bond slaves runs in 
> the OVS main thread. The OVS code that processes received LACP packets
> runs in a different thread. Until now, when the latter processes a LACP
> PDU that should enable a slave, the slave was only enabled when the
> main thread was able to run the state machine. In some cases this led
> to delays of up to 350ms when the main thread was busy or not scheduled,
> which led to corresponding delays in which packets were dropped due to
> the bond-admissibility check.

Thanks, applied to master.

Patch
diff mbox series

diff --git a/lib/lacp.c b/lib/lacp.c
index d6b36aa..e768012 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -154,6 +154,7 @@  static struct slave *slave_lookup(const struct lacp *, const void *slave)
     OVS_REQUIRES(mutex);
 static bool info_tx_equal(struct lacp_info *, struct lacp_info *)
     OVS_REQUIRES(mutex);
+static bool slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex);
 
 static unixctl_cb_func lacp_unixctl_show;
 static unixctl_cb_func lacp_unixctl_show_stats;
@@ -324,7 +325,7 @@  lacp_is_active(const struct lacp *lacp) OVS_EXCLUDED(mutex)
 /* Processes 'packet' which was received on 'slave_'.  This function should be
  * called on all packets received on 'slave_' with Ethernet Type ETH_TYPE_LACP.
  */
-void
+bool
 lacp_process_packet(struct lacp *lacp, const void *slave_,
                     const struct dp_packet *packet)
     OVS_EXCLUDED(mutex)
@@ -333,6 +334,7 @@  lacp_process_packet(struct lacp *lacp, const void *slave_,
     const struct lacp_pdu *pdu;
     long long int tx_rate;
     struct slave *slave;
+    bool lacp_may_enable = false;
 
     lacp_lock();
     slave = slave_lookup(lacp, slave_);
@@ -362,8 +364,14 @@  lacp_process_packet(struct lacp *lacp, const void *slave_,
         slave->partner = pdu->actor;
     }
 
+    /* Evaluate may_enable here to avoid dropping of packets till main thread
+     * sets may_enable to true. */
+    lacp_may_enable = slave_may_enable__(slave);
+
 out:
     lacp_unlock();
+
+    return lacp_may_enable;
 }
 
 /* Returns the lacp_status of the given 'lacp' object (which may be NULL). */
diff --git a/lib/lacp.h b/lib/lacp.h
index f35cff5..0dfaef0 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -46,7 +46,7 @@  struct lacp *lacp_ref(const struct lacp *);
 void lacp_configure(struct lacp *, const struct lacp_settings *);
 bool lacp_is_active(const struct lacp *);
 
-void lacp_process_packet(struct lacp *, const void *slave,
+bool lacp_process_packet(struct lacp *, const void *slave,
                          const struct dp_packet *packet);
 enum lacp_status lacp_status(const struct lacp *);
 
diff --git a/ofproto/bond.c b/ofproto/bond.c
index d2a8b1f..c5d5f2c 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -794,6 +794,7 @@  bond_check_admissibility(struct bond *bond, const void *slave_,
 {
     enum bond_verdict verdict = BV_DROP;
     struct bond_slave *slave;
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
     ovs_rwlock_rdlock(&rwlock);
     slave = bond_slave_lookup(bond, slave_);
@@ -811,7 +812,11 @@  bond_check_admissibility(struct bond *bond, const void *slave_,
      * drop all incoming traffic except if lacp_fallback_ab is enabled. */
     switch (bond->lacp_status) {
     case LACP_NEGOTIATED:
-        verdict = slave->enabled ? BV_ACCEPT : BV_DROP;
+        /* To reduce packet-drops due to delay in enabling of slave (post
+         * LACP-SYNC), from main thread, check for may_enable as well.
+         * When may_enable is TRUE, it means LACP is UP and waiting for the
+         * main thread to run LACP state machine and enable the slave. */
+        verdict = (slave->enabled || slave->may_enable) ? BV_ACCEPT : BV_DROP;
         goto out;
     case LACP_CONFIGURED:
         if (!bond->lacp_fallback_ab) {
@@ -847,8 +852,6 @@  bond_check_admissibility(struct bond *bond, const void *slave_,
         /* Drop all packets which arrive on backup slaves.  This is similar to
          * how Linux bonding handles active-backup bonds. */
         if (bond->active_slave != slave) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-
             VLOG_DBG_RL(&rl, "active-backup bond received packet on backup"
                         " slave (%s) destined for " ETH_ADDR_FMT,
                         slave->name, ETH_ADDR_ARGS(eth_dst));
@@ -870,6 +873,19 @@  bond_check_admissibility(struct bond *bond, const void *slave_,
 
     OVS_NOT_REACHED();
 out:
+    if (slave && (verdict != BV_ACCEPT)) {
+        VLOG_DBG_RL(&rl, "slave (%s): Admissibility verdict is to drop pkt %s."
+                    "active slave: %s, may_enable: %s enable: %s "
+                    "LACP status:%d",
+                    slave->name,
+                    (verdict == BV_DROP_IF_MOVED) ?
+                        "as different port is learned" : "",
+                    (bond->active_slave == slave) ? "true" : "false",
+                    slave->may_enable ? "true" : "false",
+                    slave->enabled ? "true" : "false",
+                    bond->lacp_status);
+    }
+
     ovs_rwlock_unlock(&rwlock);
     return verdict;
 
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 5cee37f..14a7237 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3308,6 +3308,7 @@  process_special(struct xlate_ctx *ctx, const struct xport *xport)
     const struct xbridge *xbridge = ctx->xbridge;
     const struct dp_packet *packet = ctx->xin->packet;
     enum slow_path_reason slow;
+    bool lacp_may_enable;
 
     if (!xport) {
         slow = 0;
@@ -3328,7 +3329,14 @@  process_special(struct xlate_ctx *ctx, const struct xport *xport)
     } else if (xport->xbundle && xport->xbundle->lacp
                && flow->dl_type == htons(ETH_TYPE_LACP)) {
         if (packet) {
-            lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
+            lacp_may_enable = lacp_process_packet(xport->xbundle->lacp,
+                                                  xport->ofport, packet);
+            /* Update LACP status in bond-slave to avoid packet-drops until
+             * LACP state machine is run by the main thread. */
+            if (xport->xbundle->bond && lacp_may_enable) {
+                bond_slave_set_may_enable(xport->xbundle->bond, xport->ofport,
+                                          lacp_may_enable);
+            }
         }
         slow = SLOW_LACP;
     } else if ((xbridge->stp || xbridge->rstp) &&