diff mbox

[ovs-dev,RFC,v2,16/19] keepalive: Check the PMD cycle stats as part of PMD health checks.

Message ID 1497286187-69287-17-git-send-email-bhanuprakash.bodireddy@intel.com
State Superseded
Headers show

Commit Message

Bodireddy, Bhanuprakash June 12, 2017, 4:49 p.m. UTC
This commit adds the support to check the PMD cycle stats. If the cycles
aren't changing for a duration of time this can be flagged as possible
PMD stall.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
---
 lib/dpif-netdev.c | 18 +++++++++++-------
 lib/dpif-netdev.h |  6 ++++++
 lib/keepalive.c   | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h   |  3 +++
 4 files changed, 72 insertions(+), 7 deletions(-)
diff mbox

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 06ca7fb..dd9d396 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -327,12 +327,6 @@  enum dp_stat_type {
     DP_N_STATS
 };
 
-enum pmd_cycles_counter_type {
-    PMD_CYCLES_POLLING,         /* Cycles spent polling NICs. */
-    PMD_CYCLES_PROCESSING,      /* Cycles spent processing packets */
-    PMD_N_CYCLES
-};
-
 #define XPS_TIMEOUT_MS 500LL
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -977,6 +971,8 @@  pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     struct rxq_poll *poll;
     int port_link_status = 0;
     int port_stats = 0;
+    int pmd_polling = 0;
+    uint64_t cycles[PMD_N_CYCLES];
 
     struct svec pmd_poll_list;
     svec_init(&pmd_poll_list);
@@ -1011,6 +1007,13 @@  pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     }
     svec_destroy(&pmd_poll_list);
 
+    /* Update the cycle counters in SHM. */
+    for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) {
+        atomic_read_relaxed(&pmd->cycles.n[idx], &cycles[idx]);
+    }
+
+    pmd_polling = ka_shm_update_pmd_cycles(pmd->core_id, cycles);
+
     port_link_status = ka_get_polled_ports_status(pmd->core_id);
     port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
@@ -1024,7 +1027,8 @@  pmd_health_check(struct dp_netdev_pmd_thread *pmd)
         break;
     case PMD_HC_COMPLETE:
         if (port_link_status == ACTIVE_RUN_STATE &&
-               port_stats == ACTIVE_RUN_STATE ) {
+              port_stats == ACTIVE_RUN_STATE &&
+                pmd_polling == ACTIVE_RUN_STATE) {
             ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
         }
         break;
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 6db6ed2..e7c2400 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -33,6 +33,12 @@  extern "C" {
  * headers to be aligned on a 4-byte boundary.  */
 enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
 
+enum pmd_cycles_counter_type {
+    PMD_CYCLES_POLLING,         /* Cycles spent polling NICs. */
+    PMD_CYCLES_PROCESSING,      /* Cycles spent processing packets */
+    PMD_N_CYCLES
+};
+
 bool dpif_is_netdev(const struct dpif *);
 
 #define NR_QUEUE   1
diff --git a/lib/keepalive.c b/lib/keepalive.c
index b702ebc..84813bf 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -527,6 +527,58 @@  ka_shm_update_port_statistics(const struct netdev *netdev,
                                                         state;
 }
 
+int
+ka_shm_update_pmd_cycles(int core_id, uint64_t cycles[PMD_N_CYCLES])
+{
+    int pmd_state = ACTIVE_RUN_STATE;
+    struct keepalive_shm *ka_shm = get_ka_shm();
+    if (!ka_shm) {
+        VLOG_ERR_RL(&rl, "KeepAlive: Invalid shared memory block.");
+        return -1;
+    }
+
+    uint64_t total_cycles = 0;
+    for (int i = 0; i < PMD_N_CYCLES; i++) {
+        if (cycles[i] > 0) {
+            total_cycles += cycles[i];
+        }
+    }
+
+    if (!total_cycles)
+        return -1;
+
+    int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+    if (PMD_HC_ENABLE == pmd_hc_state) {
+        ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_POLLING] =
+                   cycles[PMD_CYCLES_POLLING];
+
+        ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING] =
+                   cycles[PMD_CYCLES_PROCESSING];
+    }
+
+    if (PMD_HC_PROGRESS == pmd_hc_state) {
+        uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0;
+        uint64_t prev_poll_cycles =
+            ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_POLLING];
+        uint64_t prev_proc_cycles =
+            ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING];
+
+        VLOG_DBG_RL(&rl, "Keepalive: Going to check the PMD thresholds now.");
+
+        polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles;
+
+        proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING]
+                               - prev_proc_cycles;
+
+        if (!polling_cycles_cnt && !proc_cycles_cnt) {
+            VLOG_DBG("PMD FAILURE!");
+            pmd_state = FAILURE_STATE;
+        }
+    }
+
+    return pmd_state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                        const char *argv[] OVS_UNUSED, void *ka_shm_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 1f1f1c1..7501065 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -26,6 +26,7 @@ 
 #define KEEPALIVE_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "dpif-netdev.h"
 #include "netdev.h"
 
 #define MAX_POLL_PORTS 20
@@ -61,6 +62,7 @@  struct pmd_extended_stats {
     char *health_status;
     int num_poll_ports;
     struct poll_port_stats port_stats[MAX_POLL_PORTS];
+    uint64_t cycles[PMD_N_CYCLES];
 };
 
 struct keepalive_shm {
@@ -122,5 +124,6 @@  void ka_shm_update_port_status(const char *,int,char *,int,int);
 enum pmdhealth_status ka_get_polled_ports_status(unsigned);
 void ka_shm_update_port_statistics(const struct netdev *,int,int);
 enum pmdhealth_status ka_get_polled_ports_stats(unsigned);
+int ka_shm_update_pmd_cycles(int, uint64_t cycles[PMD_N_CYCLES]);
 
 #endif /* keepalive.h */