[ovs-dev,v3,16/19] keepalive: Check the PMD cycle stats as part of PMD health checks.

Submitted by Bhanuprakash Bodireddy on Aug. 4, 2017, 8:08 a.m.

Details

Message ID 1501834086-31829-17-git-send-email-bhanuprakash.bodireddy@intel.com
State New
Headers show

Commit Message

Bhanuprakash Bodireddy Aug. 4, 2017, 8:08 a.m.
This commit adds the support to check the PMD cycle stats. If the cycles
aren't changing for a duration of time this can be flagged as possible
PMD stall.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
---
 lib/dpif-netdev.c | 17 +++++++++--------
 lib/dpif-netdev.h |  7 +++++++
 lib/keepalive.c   | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h   |  3 +++
 4 files changed, 68 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index dca8e8e..f54677a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -335,13 +335,6 @@  enum dp_stat_type {
     DP_N_STATS
 };
 
-enum pmd_cycles_counter_type {
-    PMD_CYCLES_IDLE,            /* Cycles spent idle or unsuccessful polling */
-    PMD_CYCLES_PROCESSING,      /* Cycles spent successfully polling and
-                                 * processing polled packets */
-    PMD_N_CYCLES
-};
-
 #define XPS_TIMEOUT_MS 500LL
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -987,6 +980,8 @@  pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     struct rxq_poll *poll;
     int port_link_status = 0;
     int port_stats = 0;
+    int pmd_polling = 0;
+    uint64_t cycles[PMD_N_CYCLES];
 
     struct svec pmd_poll_list;
     svec_init(&pmd_poll_list);
@@ -1014,6 +1009,11 @@  pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     }
     svec_destroy(&pmd_poll_list);
 
+    for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) {
+        atomic_read_relaxed(&pmd->cycles.n[idx], &cycles[idx]);
+    }
+    pmd_polling = ka_info_update_pmd_cycles(pmd->core_id, cycles);
+
     int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
     switch (pmd_hc_state) {
     case PMD_HC_ENABLE:
@@ -1027,7 +1027,8 @@  pmd_health_check(struct dp_netdev_pmd_thread *pmd)
         port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
         if (port_link_status == ACTIVE_RUN_STATE &&
-               port_stats == ACTIVE_RUN_STATE ) {
+              port_stats == ACTIVE_RUN_STATE &&
+                pmd_polling == ACTIVE_RUN_STATE) {
             ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
         }
         break;
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 6db6ed2..8ea782b 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -33,6 +33,13 @@  extern "C" {
  * headers to be aligned on a 4-byte boundary.  */
 enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
 
+enum pmd_cycles_counter_type {
+    PMD_CYCLES_IDLE,            /* Cycles spent idle or unsuccessful polling. */
+    PMD_CYCLES_PROCESSING,      /* Cycles spent successfully polling and
+                                 * processing polled packets. */
+    PMD_N_CYCLES
+};
+
 bool dpif_is_netdev(const struct dpif *);
 
 #define NR_QUEUE   1
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 7e56dd4..a6120e7 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -586,6 +586,55 @@  ka_info_update_port_statistics(const struct netdev *netdev,
                                                         state;
 }
 
+int
+ka_info_update_pmd_cycles(int core_id, uint64_t cycles[PMD_N_CYCLES])
+{
+    int pmd_state = ACTIVE_RUN_STATE;
+    if (!ka_info) {
+        return FAILURE_STATE;
+    }
+
+    uint64_t total_cycles = 0;
+    for (int i = 0; i < PMD_N_CYCLES; i++) {
+        if (cycles[i] > 0) {
+            total_cycles += cycles[i];
+        }
+    }
+
+    if (!total_cycles) {
+        return -1;
+    }
+
+    int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+    if (PMD_HC_ENABLE == pmd_hc_state) {
+        ka_info->ext_stats[core_id].cycles[PMD_CYCLES_IDLE] =
+                   cycles[PMD_CYCLES_IDLE];
+
+        ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING] =
+                   cycles[PMD_CYCLES_PROCESSING];
+    }
+
+    if (PMD_HC_PROGRESS == pmd_hc_state) {
+        uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0;
+        uint64_t prev_poll_cycles =
+            ka_info->ext_stats[core_id].cycles[PMD_CYCLES_IDLE];
+        uint64_t prev_proc_cycles =
+            ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING];
+
+        polling_cycles_cnt = cycles[PMD_CYCLES_IDLE] - prev_poll_cycles;
+
+        proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING]
+                               - prev_proc_cycles;
+
+        if (!polling_cycles_cnt && !proc_cycles_cnt) {
+            VLOG_DBG("PMD FAILURE!");
+            pmd_state = FAILURE_STATE;
+        }
+    }
+
+    return pmd_state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                            const char *argv[] OVS_UNUSED, void *ka_info_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 37f1e83..df8768c 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -27,6 +27,7 @@ 
 #define KA_DP_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "dpif-netdev.h"
 #include "netdev.h"
 
 struct smap;
@@ -76,6 +77,7 @@  struct poll_port_stats {
 struct pmd_extended_stats {
     char *health_status;
     struct poll_port_stats *port_stats;
+    uint64_t cycles[PMD_N_CYCLES];
     int num_poll_ports;
 };
 
@@ -140,5 +142,6 @@  void ka_info_update_port_status(const char *,int,char *,int,int);
 enum pmdhealth_status ka_get_polled_ports_status(unsigned);
 void ka_info_update_port_statistics(const struct netdev *,int,int);
 enum pmdhealth_status ka_get_polled_ports_stats(unsigned);
+int ka_info_update_pmd_cycles(int, uint64_t cycles[PMD_N_CYCLES]);
 
 #endif /* keepalive.h */