[ovs-dev,v3,15/19] keepalive: Check the packet statistics as part of PMD health checks.

Submitted by Bhanuprakash Bodireddy on Aug. 4, 2017, 8:08 a.m.

Details

Message ID 1501834086-31829-16-git-send-email-bhanuprakash.bodireddy@intel.com
State New
Headers show

Commit Message

Bhanuprakash Bodireddy Aug. 4, 2017, 8:08 a.m.
This commit adds the support to check the packet statistics on the port
polled by PMD thread. If the packets aren't processed due to PMD thread
stall/deadlock the statistics wont update and this can be used by
monitoring framework to confirm PMD failure.

This mechanism has limitation with MQ enabled. In some cases queues of
the DPDK port can be polled by different PMD threads. Even if one PMD
thread stalls the port statistics will be incremented due to an other
queue processed by different PMD. The function can return active state
considering the packets processed in this case.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
---
 lib/dpif-netdev.c | 25 +++++++++++---
 lib/keepalive.c   | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h   |  5 +++
 3 files changed, 122 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a6edf4d..dca8e8e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -984,8 +984,9 @@  sorted_poll_thread_list(struct dp_netdev *dp,
 static void
 pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-    int port_link_status = 0;
     struct rxq_poll *poll;
+    int port_link_status = 0;
+    int port_stats = 0;
 
     struct svec pmd_poll_list;
     svec_init(&pmd_poll_list);
@@ -1000,22 +1001,36 @@  pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     int i = 0;
     SVEC_FOR_EACH (i, port_name, &pmd_poll_list) {
         struct netdev *dev = netdev_from_name(port_name);
+        VLOG_DBG("Keepalive: Checking port %s", port_name);
         if (dev) {
             char *link_state = netdev_get_carrier(dev) ? "up" : "down";
             ka_info_update_port_status(port_name, 0, link_state,
                                         pmd->core_id, i);
+            if (!strcmp(link_state, "up")) {
+                ka_info_update_port_statistics(dev, pmd->core_id, i);
+            }
             netdev_close(dev);
         }
     }
     svec_destroy(&pmd_poll_list);
 
-    port_link_status = ka_get_polled_ports_status(pmd->core_id);
-
     int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
-    if (PMD_HC_COMPLETE == pmd_hc_state) {
-        if (port_link_status == ACTIVE_RUN_STATE) {
+    switch (pmd_hc_state) {
+    case PMD_HC_ENABLE:
+        ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_PROGRESS);
+        break;
+    case PMD_HC_PROGRESS:
+        ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_COMPLETE);
+        break;
+    case PMD_HC_COMPLETE:
+        port_link_status = ka_get_polled_ports_status(pmd->core_id);
+        port_stats = ka_get_polled_ports_stats(pmd->core_id);
+
+        if (port_link_status == ACTIVE_RUN_STATE &&
+               port_stats == ACTIVE_RUN_STATE ) {
             ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
         }
+        break;
     }
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index c306839..7e56dd4 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -23,6 +23,7 @@ 
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
@@ -30,6 +31,7 @@ 
 #include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
 
 static bool keepalive_enable = false;    /* Keepalive disabled by default */
 static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
@@ -462,6 +464,31 @@  enum pmdhealth_status ka_get_polled_ports_status(unsigned core_id)
     }
 }
 
+enum pmdhealth_status ka_get_polled_ports_stats(unsigned core_id)
+{
+    if (!ka_info) {
+        return FAILURE_STATE;
+    }
+
+    int failed = 0;
+    int n_ports = ka_info->ext_stats[core_id].num_poll_ports;
+    for (int i = 0; i < n_ports; i++) {
+        int state;
+        state =
+          ka_info->ext_stats[core_id].port_stats[i].state[PORT_STATS_CHECK];
+        if (state == FAILURE_STATE) {
+            failed = 1;
+            break;
+        }
+    }
+
+    if (!failed) {
+        return ACTIVE_RUN_STATE;
+    } else {
+        return FAILURE_STATE;
+    }
+}
+
 void
 ka_info_update_port_status(const char *port, int qid OVS_UNUSED,
                            char *link_state, int core_id, int idx)
@@ -489,6 +516,76 @@  ka_info_update_port_status(const char *port, int qid OVS_UNUSED,
                                                                state;
 }
 
+void
+ka_info_update_port_statistics(const struct netdev *netdev,
+                              int core_id, int idx)
+{
+    int error;
+    int state = FAILURE_STATE;
+
+    if (!ka_info) {
+        VLOG_ERR_RL(&rl, "Keepalive disabled");
+        return;
+    }
+    ka_info->ext_stats[core_id].num_poll_ports = idx;
+
+    int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+    if (PMD_HC_ENABLE == pmd_hc_state) {
+        struct netdev_stats *stats;
+        stats = &ka_info->ext_stats[core_id].port_stats[idx].stats;
+        error = netdev_get_stats(netdev, stats);
+        if (error) {
+            VLOG_ERR("\tCouldn't retrieve stats (%s)", ovs_strerror(error));
+        }
+        state = ACTIVE_RUN_STATE;
+    }
+
+    if (PMD_HC_PROGRESS == pmd_hc_state) {
+        struct netdev_stats temp_stats;
+
+        error = netdev_get_stats(netdev, &temp_stats);
+        if (!error) {
+            uint64_t tx_pkts_cnt = 0;
+            uint64_t rx_pkts_cnt = 0;
+            int skip_tx_check = 0, skip_rx_check = 0;
+
+            struct netdev_stats *prev_stats =
+                   &ka_info->ext_stats[core_id].port_stats[idx].stats;
+
+            if (!temp_stats.tx_packets && !prev_stats->tx_packets) {
+                VLOG_DBG_RL(&rl, "\tNo packets transmitted");
+                skip_tx_check = 1;
+            } else {
+                tx_pkts_cnt = temp_stats.tx_packets -
+                                         prev_stats->tx_packets;
+            }
+
+            if (!temp_stats.rx_packets && !prev_stats->rx_packets) {
+                VLOG_DBG_RL(&rl, "\tNo packets received");
+                skip_rx_check = 1;
+            } else {
+                rx_pkts_cnt = temp_stats.rx_packets -
+                                         prev_stats->rx_packets;
+            }
+
+            if (skip_tx_check && skip_rx_check) {
+                VLOG_DBG_RL(&rl, "\tNo active traffic");
+                state = ACTIVE_RUN_STATE;
+            } else if ((!skip_tx_check && tx_pkts_cnt) ||
+                      (!skip_rx_check && rx_pkts_cnt)) {
+                VLOG_DBG_RL(&rl, "\tStats updated");
+                state = ACTIVE_RUN_STATE;
+            } else {
+                VLOG_DBG("\tPMD failure");
+                state = FAILURE_STATE;
+            }
+        }
+    }
+
+    ka_info->ext_stats[core_id].port_stats[idx].state[PORT_STATS_CHECK] =
+                                                        state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                            const char *argv[] OVS_UNUSED, void *ka_info_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index ff4aa3c..37f1e83 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -27,6 +27,8 @@ 
 #define KA_DP_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "netdev.h"
+
 struct smap;
 
 enum keepalive_state {
@@ -68,6 +70,7 @@  struct poll_port_stats {
     const char *port;
     char *link_state;
     int state[PORT_NUM_CHECKS];
+    struct netdev_stats stats;
 };
 
 struct pmd_extended_stats {
@@ -135,5 +138,7 @@  void ka_load_process_list(struct hmap **);
 void dispatch_heartbeats(void);
 void ka_info_update_port_status(const char *,int,char *,int,int);
 enum pmdhealth_status ka_get_polled_ports_status(unsigned);
+void ka_info_update_port_statistics(const struct netdev *,int,int);
+enum pmdhealth_status ka_get_polled_ports_stats(unsigned);
 
 #endif /* keepalive.h */