[ovs-dev,v3,04/19] Keepalive: Add initial keepalive support.

Message ID 1501834086-31829-5-git-send-email-bhanuprakash.bodireddy@intel.com
State Superseded
Headers show

Commit Message

Bodireddy, Bhanuprakash Aug. 4, 2017, 8:07 a.m.
This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

This commit adds new ovsdb column "keepalive" that shows the status
of the datapath threads. This is implemented for DPDK datapath and
only status of PMD threads is reported.

For eg:
  To enable keepalive feature.
  'ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true'

  To set timer interval of 5000ms for monitoring packet processing cores.
  'ovs-vsctl --no-wait set Open_vSwitch . \
     other_config:keepalive-interval="5000"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
---
 lib/automake.mk            |   2 +
 lib/dpdk-stub.c            |   6 ++
 lib/dpdk.c                 |  30 +++++++--
 lib/dpdk.h                 |   4 ++
 lib/keepalive.c            | 157 +++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h            |  74 +++++++++++++++++++++
 lib/netdev-dpdk.c          |  61 +++++++++++++++++-
 lib/netdev-dpdk.h          |   5 ++
 vswitchd/bridge.c          |   8 +++
 vswitchd/vswitch.ovsschema |   8 ++-
 vswitchd/vswitch.xml       |  49 ++++++++++++++
 11 files changed, 397 insertions(+), 7 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

Patch

diff --git a/lib/automake.mk b/lib/automake.mk
index 2415f4c..0d99f0a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@  lib_libopenvswitch_la_SOURCES = \
 	lib/json.c \
 	lib/jsonrpc.c \
 	lib/jsonrpc.h \
+	lib/keepalive.c \
+	lib/keepalive.h \
 	lib/lacp.c \
 	lib/lacp.h \
 	lib/latch.h \
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index d7fb19b..b4f111a 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -49,6 +49,12 @@  dpdk_get_vhost_sock_dir(void)
     return NULL;
 }
 
+bool
+dpdk_is_enabled(void)
+{
+    return false;
+}
+
 void
 dpdk_register_pmd_core(unsigned core_id OVS_UNUSED)
 {
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8db63bf..250cc2f 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -32,6 +32,7 @@ 
 
 #include "dirs.h"
 #include "fatal-signal.h"
+#include "keepalive.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
@@ -42,6 +43,7 @@  VLOG_DEFINE_THIS_MODULE(dpdk);
 static FILE *log_stream = NULL;       /* Stream for DPDK log redirection */
 
 static char *vhost_sock_dir = NULL;   /* Location of vhost-user sockets */
+static bool dpdk_enabled = false;     /* DPDK status. */
 
 static int
 process_vhost_flags(char *flag, const char *default_val, int size,
@@ -303,6 +305,12 @@  static cookie_io_functions_t dpdk_log_func = {
     .write = dpdk_log_write,
 };
 
+bool
+dpdk_is_enabled(void)
+{
+    return dpdk_enabled;
+}
+
 static void
 dpdk_init__(const struct smap *ovs_other_config)
 {
@@ -456,9 +464,7 @@  dpdk_init__(const struct smap *ovs_other_config)
 void
 dpdk_init(const struct smap *ovs_other_config)
 {
-    static bool enabled = false;
-
-    if (enabled || !ovs_other_config) {
+    if (dpdk_enabled || !ovs_other_config) {
         return;
     }
 
@@ -468,7 +474,7 @@  dpdk_init(const struct smap *ovs_other_config)
         if (ovsthread_once_start(&once_enable)) {
             VLOG_INFO("DPDK Enabled - initializing...");
             dpdk_init__(ovs_other_config);
-            enabled = true;
+            dpdk_enabled = true;
             VLOG_INFO("DPDK Enabled - initialized");
             ovsthread_once_done(&once_enable);
         }
@@ -477,6 +483,22 @@  dpdk_init(const struct smap *ovs_other_config)
     }
 }
 
+int
+dpdk_ka_init(struct keepalive_info *ka_info)
+{
+    /* Initialize keepalive subsystem */
+    if ((rte_global_keepalive_info =
+            rte_keepalive_create(&dpdk_failcore_cb, ka_info)) == NULL) {
+        VLOG_ERR("Keepalive initialization failed.");
+        return -1;
+    } else {
+        rte_keepalive_register_relay_callback(rte_global_keepalive_info,
+            dpdk_ka_update_core_state, ka_info);
+    }
+
+    return 0;
+}
+
 const char *
 dpdk_get_vhost_sock_dir(void)
 {
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 3f31211..7619730 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -17,6 +17,7 @@ 
 #ifndef DPDK_H
 #define DPDK_H
 
+#include "stdbool.h"
 #ifdef DPDK_NETDEV
 
 #include <rte_config.h>
@@ -31,9 +32,12 @@ 
 #endif /* DPDK_NETDEV */
 
 struct smap;
+struct keepalive_info;
 
 struct rte_keepalive *rte_global_keepalive_info;
+bool dpdk_is_enabled(void);
 void dpdk_init(const struct smap *ovs_other_config);
+int dpdk_ka_init(struct keepalive_info *ka_info);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 0000000..e93dc99
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,157 @@ 
+/*
+ * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "dpdk.h"
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;    /* Keepalive disabled by default */
+static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
+static uint32_t keepalive_timer_interval;     /* keepalive timer interval */
+static struct keepalive_info *ka_info = NULL;
+
+inline bool
+ka_is_enabled(void)
+{
+    return keepalive_enable;
+}
+
+inline int
+ka_get_pmd_tid(unsigned core_idx)
+{
+    int tid = -1;
+    if (ka_is_enabled()) {
+        tid = ka_info->thread_id[core_idx];
+        ovs_assert(tid > 0);
+    }
+    return tid;
+}
+
+void
+ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
+                    uint64_t last_alive)
+{
+    struct ka_process_info *pinfo;
+    int tid = ka_get_pmd_tid(core_id);
+
+    ovs_mutex_lock(&ka_info->proclist_mutex);
+    HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+                             &ka_info->process_list) {
+        if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+            pinfo->core_state = state;
+            pinfo->core_last_seen_times = last_alive;
+        }
+    }
+    ovs_mutex_unlock(&ka_info->proclist_mutex);
+}
+
+/* Retrieve and return the keepalive timer interval from OVSDB. */
+static uint32_t
+get_ka_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
+{
+#define OVS_KEEPALIVE_TIMEOUT 1000    /* Default timeout set to 1000ms */
+    uint32_t ka_interval;
+
+    /* Timer granularity in milliseconds
+     * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */
+    ka_interval = smap_get_int(ovs_other_config, "keepalive-interval",
+                  OVS_KEEPALIVE_TIMEOUT);
+
+    VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", ka_interval);
+    return ka_interval;
+}
+
+static struct keepalive_info *
+keepalive_info_create(void)
+{
+    struct keepalive_info *ka_info;
+
+    ka_info = xzalloc(sizeof *ka_info);
+    return ka_info;
+}
+
+static int
+ka_init__(void)
+{
+    ka_info = keepalive_info_create();
+    if (!ka_info) {
+        VLOG_ERR("OvS Keepalive - initialization failed.");
+        return -1;
+    }
+
+    ovs_mutex_init(&ka_info->proclist_mutex);
+    hmap_init(&ka_info->process_list);
+
+#ifdef DPDK_NETDEV
+    return dpdk_ka_init(ka_info);
+#else
+    return -1;
+#endif
+}
+
+void
+ka_init(const struct smap *ovs_other_config)
+{
+    if (ka_init_status || !ovs_other_config) {
+        return;
+    }
+
+    static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
+    if (ovsthread_once_start(&once_enable)) {
+        if (smap_get_bool(ovs_other_config, "enable-keepalive", false)) {
+            keepalive_enable = true;
+            VLOG_INFO("OvS Keepalive enabled.");
+
+            keepalive_timer_interval =
+                get_ka_timer_interval(ovs_other_config);
+
+            int err = ka_init__();
+            if (!err) {
+                VLOG_INFO("OvS Keepalive - initialized.");
+                ka_init_status = ka_init_success;
+            }
+        }
+
+        ovsthread_once_done(&once_enable);
+    }
+}
+
+void
+ka_destroy(void)
+{
+    if (ka_info) {
+        struct ka_process_info *pinfo;
+        ovs_mutex_lock(&ka_info->proclist_mutex);
+        HMAP_FOR_EACH_POP (pinfo, node, &ka_info->process_list) {
+            free(pinfo);
+        }
+        ovs_mutex_unlock(&ka_info->proclist_mutex);
+        hmap_destroy(&ka_info->process_list);
+
+        ovs_mutex_destroy(&ka_info->proclist_mutex);
+
+        free(ka_info);
+    }
+}
diff --git a/lib/keepalive.h b/lib/keepalive.h
new file mode 100644
index 0000000..b87b66f
--- /dev/null
+++ b/lib/keepalive.h
@@ -0,0 +1,74 @@ 
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KEEPALIVE_H
+#define KEEPALIVE_H
+
+#include <stdint.h>
+#include "openvswitch/hmap.h"
+#include "ovs-thread.h"
+#ifdef DPDK_NETDEV
+#include <rte_keepalive.h>
+#define KA_DP_MAXCORES RTE_KEEPALIVE_MAXCORES
+#else
+#define KA_DP_MAXCORES 128
+#endif /* DPDK_NETDEV */
+
+struct smap;
+
+enum keepalive_state {
+    KA_STATE_UNUSED = 0,
+    KA_STATE_ALIVE = 1,
+    KA_STATE_MISSING = 4,
+    KA_STATE_DEAD = 2,
+    KA_STATE_GONE = 3,
+    KA_STATE_DOZING = 5,
+    KA_STATE_SLEEP = 6,
+    KA_STATE_CHECK = 7
+};
+
+struct ka_process_info {
+    int tid;
+    int core_id;
+    enum keepalive_state core_state;
+    uint64_t core_last_seen_times;
+    struct hmap_node node;
+};
+
+struct keepalive_info {
+    /* Mutex for 'process_list'. */
+    struct ovs_mutex proclist_mutex;
+
+    /* List of process/threads monitored by KA framework. */
+    struct hmap process_list OVS_GUARDED;
+
+    /* Store Datapath threads 'tid'.
+     * In case of DPDK there can be max of KA_DP_MAXCORES threads. */
+    pid_t thread_id[KA_DP_MAXCORES];
+};
+
+enum keepalive_status {
+    ka_init_failure = 0,
+    ka_init_success
+};
+
+void ka_init(const struct smap *);
+void ka_destroy(void);
+void ka_set_pmd_state_ts(unsigned, enum keepalive_state, uint64_t);
+
+int ka_get_pmd_tid(unsigned core);
+bool ka_is_enabled(void);
+#endif /* keepalive.h */
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1d82bca..5415544 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -31,6 +31,7 @@ 
 #include <rte_errno.h>
 #include <rte_eth_ring.h>
 #include <rte_ethdev.h>
+#include <rte_keepalive.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
 #include <rte_meter.h>
@@ -41,6 +42,7 @@ 
 #include "dpdk.h"
 #include "dpif-netdev.h"
 #include "fatal-signal.h"
+#include "keepalive.h"
 #include "netdev-provider.h"
 #include "netdev-vport.h"
 #include "odp-util.h"
@@ -51,8 +53,9 @@ 
 #include "ovs-numa.h"
 #include "ovs-thread.h"
 #include "ovs-rcu.h"
-#include "packets.h"
 #include "openvswitch/shash.h"
+#include "packets.h"
+#include "process.h"
 #include "smap.h"
 #include "sset.h"
 #include "unaligned.h"
@@ -588,6 +591,62 @@  dpdk_mp_put(struct dpdk_mp *dmp)
     ovs_mutex_unlock(&dpdk_mp_mutex);
 }
 
+/* Callback function invoked on heartbeat miss.  Verify if it is genuine
+ * heartbeat miss or a false positive and log the message accordingly.
+ */
+void
+dpdk_failcore_cb(void *ptr_data OVS_UNUSED, const int core_id)
+{
+    uint32_t tid = ka_get_pmd_tid(core_id);
+    struct process_info pinfo;
+
+    int success = get_process_info(tid, &pinfo);
+    if (success) {
+        switch (pinfo.state) {
+        case 'R':
+            VLOG_INFO_RL(&rl,"False positive, pmd tid[%"PRIu32"] alive\n",
+                             tid);
+            break;
+        case 'S':
+        case 't':
+        case 'Z':
+        case 'D':
+            VLOG_WARN_RL(&rl,
+                         "PMD tid[%"PRIu32"] on core[%d] is unresponsive\n",
+                         tid, core_id);
+            break;
+        default:
+            VLOG_DBG("%s: The process state: %c\n", __FUNCTION__, pinfo.state);
+            OVS_NOT_REACHED();
+        }
+    }
+}
+
+/*
+ * This function shall be invoked periodically to write the core status and
+ * last seen timestamp of the cores in to keepalive info structure.
+ */
+void
+dpdk_ka_update_core_state(void *ptr_data OVS_UNUSED, const int core_id,
+       const enum rte_keepalive_state core_state, uint64_t last_alive)
+{
+    switch (core_state) {
+    case RTE_KA_STATE_ALIVE:
+    case RTE_KA_STATE_MISSING:
+        ka_set_pmd_state_ts(core_id, KA_STATE_ALIVE, last_alive);
+        break;
+    case RTE_KA_STATE_DOZING:
+    case RTE_KA_STATE_SLEEP:
+    case RTE_KA_STATE_DEAD:
+    case RTE_KA_STATE_GONE:
+        ka_set_pmd_state_ts(core_id, core_state, last_alive);
+        break;
+    case RTE_KA_STATE_UNUSED:
+        ka_set_pmd_state_ts(core_id, KA_STATE_UNUSED, 0);
+        break;
+    }
+}
+
 /* Tries to allocate new mempool on requested_socket_id with
  * mbuf size corresponding to requested_mtu.
  * On success new configuration will be applied.
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index b7d02a7..229e0d0 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -18,15 +18,20 @@ 
 #define NETDEV_DPDK_H
 
 #include <config.h>
+#include <stdint.h>
 
 #include "openvswitch/compiler.h"
 
 struct dp_packet;
+enum rte_keepalive_state;
 
 #ifdef DPDK_NETDEV
 
 void netdev_dpdk_register(void);
 void free_dpdk_buf(struct dp_packet *);
+void dpdk_failcore_cb(void *, const int);
+void dpdk_ka_update_core_state(void *ptr, const int,
+                               const enum rte_keepalive_state, uint64_t);
 
 #else
 
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index a8cbae7..8ff91df 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -34,6 +34,7 @@ 
 #include "hmapx.h"
 #include "if-notifier.h"
 #include "jsonrpc.h"
+#include "keepalive.h"
 #include "lacp.h"
 #include "mac-learning.h"
 #include "mcast-snooping.h"
@@ -506,6 +507,7 @@  bridge_exit(bool delete_datapath)
         bridge_destroy(br, delete_datapath);
     }
     ovsdb_idl_destroy(idl);
+    ka_destroy();
 }
 
 /* Looks at the list of managers in 'ovs_cfg' and extracts their remote IP
@@ -2955,6 +2957,12 @@  bridge_run(void)
         dpdk_init(&cfg->other_config);
     }
 
+    /* Keepalive is implemented only for DPDK datapath now.
+     * Initialize KA framework only if DPDK is enabled. */
+    if (dpdk_is_enabled()) {
+        ka_init(&cfg->other_config);
+    }
+
     /* Initialize the ofproto library.  This only needs to run once, but
      * it must be done after the configuration is set.  If the
      * initialization has already occurred, bridge_init_ofproto()
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 19b49da..9aaa9d3 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@ 
 {"name": "Open_vSwitch",
- "version": "7.15.0",
- "cksum": "544856471 23228",
+ "version": "7.16.0",
+ "cksum": "2912349852 23382",
  "tables": {
    "Open_vSwitch": {
      "columns": {
@@ -28,6 +28,10 @@ 
        "statistics": {
          "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"},
          "ephemeral": true},
+       "keepalive": {
+         "type": {"key": "string", "value": "string", "min": 0,
+                  "max": "unlimited"},
+         "ephemeral": true},
        "ovs_version": {
          "type": {"key": {"type": "string"},
                   "min": 0, "max": 1}},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 074535b..88faf50 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -568,6 +568,55 @@ 
           </p>
         </column>
       </group>
+
+      <group title="Keepalive">
+        <p>
+          The <code>keepalive</code> column contains key-value pairs that
+          report health of datapath threads in Open vSwitch.  These are updated
+          periodically (based on the keepalive-interval).
+        </p>
+
+        <column name="other_config" key="enable-keepalive"
+                type='{"type": "boolean"}'>
+          Keepalive is disabled by default to avoid overhead in the common
+          case when heartbeat monitoring is not useful.  Set this value to
+          <code>true</code> to enable keepalive <ref column="keepalive"/>
+          column or to <code>false</code> to explicitly disable it.
+        </column>
+
+        <column name="other_config" key="keepalive-interval"
+                type='{"type": "integer", "minInteger": 1}'>
+          <p>
+            Specifies the keepalive interval value.
+          </p>
+          <p>
+            If not specified, this will be set to 1000 milliseconds (default
+            value). Changing this value requires restarting the daemon.
+          </p>
+        </column>
+
+        <column name="keepalive" key="PMDID">
+          <p>
+            One such key-value pair, with <code>ID</code> replaced by the
+            PMD thread, will exist for each active PMD thread.  The value is a
+            comma-separated list of PMD thread status, core number and the
+            last seen timestamp of PMD thread. In respective order, these
+            values are:
+          </p>
+
+          <ol>
+            <li>Status of PMD thread.  Valid values include ALIVE, MISSING,
+            DEAD, GONE, DOZING, SLEEPING.</li>
+            <li>Core id of PMD thread.</li>
+            <li>Last seen timestamp of the PMD core.</li>
+          </ol>
+
+          <p>
+            This is only valid for OvS-DPDK Datapath and PMD threads status
+            is implemented currently.
+          </p>
+        </column>
+      </group>
     </group>
 
     <group title="Version Reporting">