[ovs-dev,2/2] Avoid packet drop on LACP bond after link up

Message ID 1551285539-31225-2-git-send-email-nitin.katiyar@ericsson.com
State New
Headers show
Series
  • [ovs-dev,1/2] Avoid packet drop on LACP bond after link up
Related show

Commit Message

Nitin Katiyar Feb. 27, 2019, 8:37 a.m.
Problem:

Patch

========
On certain Fortville NICs it has been observed that PHY UP detection can
get delayed (sometimes up to 4-5 secs). When the driver fails to fetch
PHY status as UP even though its actually UP,  LACP packets can get
exchanged and LACP slave brought UP. In such a case, the remote end
would start sending traffic on that slave, but OVS drops it, as the
bond-slave is not yet enabled due to PHY DOWN.

Fix:
====
The main intention here is delay LACP negotiation until carrier (PHY)
status is read as UP.

1. In port_run()/bundle_run(), cache the carrier status in
  "struct ofport_dpif"/"struct bond_slave".

2. When carrier state is DOWN, do not send any LACPDUs and
  drop any received LACPDUs.

3. When LACP state changes or LACPDU is received, trigger re-checking
  of carrier-state (in port_run()) by incrementing the connectivity
  sequence number to find out the true carrier state as fast as
  possible.

Signed-off-by: Nitin Katiyar <nitin.katiyar@ericsson.com>
Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
---
 lib/lacp.c             | 40 +++++++++++++++++++++++++++++++++-------
 lib/lacp.h             |  4 ++--
 ofproto/bond.c         | 30 ++++++++++++++++++++++++++++++
 ofproto/bond.h         |  3 +++
 ofproto/ofproto-dpif.c | 10 +++++++---
 tests/ofproto-dpif.at  |  3 +++
 6 files changed, 78 insertions(+), 12 deletions(-)

diff --git a/lib/lacp.c b/lib/lacp.c
index 7ac85f9..359b6ce 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -33,6 +33,7 @@ 
 #include "unixctl.h"
 #include "openvswitch/vlog.h"
 #include "util.h"
+#include "ofproto/bond.h"
 
 VLOG_DEFINE_THIS_MODULE(lacp);
 
@@ -148,7 +149,7 @@  static void slave_get_actor(struct slave *, struct lacp_info *actor)
     OVS_REQUIRES(mutex);
 static void slave_get_priority(struct slave *, struct lacp_info *priority)
     OVS_REQUIRES(mutex);
-static bool slave_may_tx(const struct slave *)
+static bool slave_may_tx(const void *bond, const struct slave *)
     OVS_REQUIRES(mutex);
 static struct slave *slave_lookup(const struct lacp *, const void *slave)
     OVS_REQUIRES(mutex);
@@ -334,6 +335,7 @@  lacp_process_packet(struct lacp *lacp, const void *bond, const void *slave_,
     const struct lacp_pdu *pdu;
     long long int tx_rate;
     struct slave *slave;
+    bool carrier_up = false;
     bool lacp_may_enable = false;
 
     lacp_lock();
@@ -350,6 +352,21 @@  lacp_process_packet(struct lacp *lacp, const void *bond, const void *slave_,
         goto out;
     }
 
+    /*
+     * On some NICs L1 state reporting is slow. In case LACP packets are
+     * received while carrier (L1) state is still down, drop the LACPDU and
+     * trigger re-checking of L1 state.
+     */
+    carrier_up = bond_slave_get_carrier(bond, slave_);
+    if (!carrier_up) {
+        seq_change(connectivity_seq_get());
+
+        VLOG_INFO_RL(&rl, "carrier still DOWN - conn seq changed for %s, "
+                  "dropping packet\n", slave->name);
+
+        goto out;
+    }
+
     slave->status = LACP_CURRENT;
     tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
     timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * tx_rate);
@@ -529,7 +546,8 @@  lacp_slave_is_current(const struct lacp *lacp, const void *slave_)
 
 /* This function should be called periodically to update 'lacp'. */
 void
-lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex)
+lacp_run(struct lacp *lacp, const void *bond,
+         lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex)
 {
     struct slave *slave;
 
@@ -559,7 +577,7 @@  lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex)
     HMAP_FOR_EACH (slave, node, &lacp->slaves) {
         struct lacp_info actor;
 
-        if (!slave_may_tx(slave)) {
+        if (!slave_may_tx(bond, slave)) {
             continue;
         }
 
@@ -588,13 +606,13 @@  lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex)
 
 /* Causes poll_block() to wake up when lacp_run() needs to be called again. */
 void
-lacp_wait(struct lacp *lacp) OVS_EXCLUDED(mutex)
+lacp_wait(struct lacp *lacp, const void *bond) OVS_EXCLUDED(mutex)
 {
     struct slave *slave;
 
     lacp_lock();
     HMAP_FOR_EACH (slave, node, &lacp->slaves) {
-        if (slave_may_tx(slave)) {
+        if (slave_may_tx(bond, slave)) {
             timer_wait(&slave->tx);
         }
 
@@ -818,9 +836,17 @@  slave_get_priority(struct slave *slave, struct lacp_info *priority)
 }
 
 static bool
-slave_may_tx(const struct slave *slave) OVS_REQUIRES(mutex)
+slave_may_tx(const void *bond, const struct slave *slave)
+    OVS_REQUIRES(mutex)
 {
-    return slave->lacp->active || slave->status != LACP_DEFAULTED;
+    bool carrier_up = true;
+
+    if (bond) {
+        carrier_up = bond_slave_get_carrier(bond, slave->aux);
+    }
+
+    return carrier_up &&
+        (slave->lacp->active || slave->status != LACP_DEFAULTED);
 }
 
 static struct slave *
diff --git a/lib/lacp.h b/lib/lacp.h
index 1505c2c..9a302c9 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -68,8 +68,8 @@  bool lacp_slave_is_current(const struct lacp *, const void *slave_);
 /* Callback function for lacp_run() for sending a LACP PDU. */
 typedef void lacp_send_pdu(void *slave, const void *pdu, size_t pdu_size);
 
-void lacp_run(struct lacp *, lacp_send_pdu *);
-void lacp_wait(struct lacp *);
+void lacp_run(struct lacp *, const void *, lacp_send_pdu *);
+void lacp_wait(struct lacp *, const void *);
 
 struct lacp_slave_stats {
     /* id */
diff --git a/ofproto/bond.c b/ofproto/bond.c
index cd6fd33..68c3acb 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -93,6 +93,7 @@  struct bond_slave {
     /* Link status. */
     bool enabled;               /* May be chosen for flows? */
     bool may_enable;            /* Client considers this slave bondable. */
+    bool carrier_up;            /* Carrier state from NIC port */
     long long delay_expires;    /* Time after which 'enabled' may change. */
 
     /* Rebalancing info.  Used only by bond_rebalance(). */
@@ -634,6 +635,35 @@  bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable)
     ovs_rwlock_unlock(&rwlock);
 }
 
+void
+bond_slave_set_carrier(struct bond *bond, void *slave_, bool carrier_up)
+{
+    struct bond_slave *slave = NULL;
+
+    ovs_rwlock_wrlock(&rwlock);
+    slave = bond_slave_lookup(bond, slave_);
+    if (slave) {
+        slave->carrier_up = carrier_up;
+    }
+    ovs_rwlock_unlock(&rwlock);
+}
+
+bool
+bond_slave_get_carrier(const struct bond *bond, void *slave_)
+{
+    bool carrier_up = false;
+    struct bond_slave *slave;
+
+    ovs_rwlock_rdlock(&rwlock);
+    slave = bond_slave_lookup((struct bond *)bond, slave_);
+    if (slave) {
+        carrier_up = slave->carrier_up;
+    }
+    ovs_rwlock_unlock(&rwlock);
+
+    return carrier_up;
+}
+
 /* Performs periodic maintenance on 'bond'.
  *
  * Returns true if the caller should revalidate its flows.
diff --git a/ofproto/bond.h b/ofproto/bond.h
index e7c3d9b..743e2b2 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -79,6 +79,9 @@  void bond_wait(struct bond *);
 
 void bond_slave_set_may_enable(struct bond *, void *slave_, bool may_enable);
 
+void bond_slave_set_carrier(struct bond *bond, void *slave_, bool carrier_up);
+bool bond_slave_get_carrier(const struct bond *bond, void *slave_);
+
 /* Special MAC learning support for SLB bonding. */
 bool bond_should_send_learning_packets(struct bond *);
 struct dp_packet *bond_compose_learning_packet(struct bond *,
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 50c959b..a589392 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -137,6 +137,7 @@  struct ofport_dpif {
     struct cfm *cfm;            /* Connectivity Fault Management, if any. */
     struct bfd *bfd;            /* BFD, if any. */
     struct lldp *lldp;          /* lldp, if any. */
+    bool carrier_up;            /* Carrier state from NIC */
     bool is_tunnel;             /* This port is a tunnel. */
     long long int carrier_seq;  /* Carrier status changes. */
     struct ofport_dpif *peer;   /* Peer if patch port. */
@@ -3324,12 +3325,13 @@  static void
 bundle_run(struct ofbundle *bundle)
 {
     if (bundle->lacp) {
-        lacp_run(bundle->lacp, send_pdu_cb);
+        lacp_run(bundle->lacp, bundle->bond, send_pdu_cb);
     }
     if (bundle->bond) {
         struct ofport_dpif *port;
 
         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
+            bond_slave_set_carrier(bundle->bond, port, port->carrier_up);
             bond_slave_set_may_enable(bundle->bond, port, port->up.may_enable);
         }
 
@@ -3347,7 +3349,7 @@  static void
 bundle_wait(struct ofbundle *bundle)
 {
     if (bundle->lacp) {
-        lacp_wait(bundle->lacp);
+        lacp_wait(bundle->lacp, bundle->bond);
     }
     if (bundle->bond) {
         bond_wait(bundle->bond);
@@ -3563,8 +3565,10 @@  ofport_update_peer(struct ofport_dpif *ofport)
 static bool
 may_enable_port(struct ofport_dpif *ofport)
 {
+    ofport->carrier_up = netdev_get_carrier(ofport->up.netdev);
+
     /* Carrier must be up. */
-    if (!netdev_get_carrier(ofport->up.netdev)) {
+    if (!ofport->carrier_up) {
         return false;
     }
 
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index ded2ef0..12d0349 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -6581,6 +6581,9 @@  OVS_VSWITCHD_START([dnl
                     other_config:lacp-port-priority=222                 \
                     other_config:lacp-aggregation-key=3333 ])
 
+ovs-appctl netdev-dummy/set-admin-state p1 up
+ovs-appctl netdev-dummy/set-admin-state p2 up
+
 on_exit 'kill `cat test-sflow.pid`'
 AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore])
 AT_CAPTURE_FILE([sflow.log])