@@ -970,6 +970,28 @@ sorted_poll_thread_list(struct dp_netdev *dp,
*n = k;
}
+static void
+pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+{
+ /* Nothing */
+}
+
+static void
+get_datapath_health(struct dp_netdev *dp)
+{
+ for (int core_id = 0; core_id < KEEPALIVE_MAXCORES; core_id++) {
+ struct dp_netdev_pmd_thread *pmd;
+
+ /* Check only PMD threads whose health check is enabled. */
+ if (OVS_LIKELY(!ka_is_pmdhealth_check_enabled(core_id))) {
+ continue;
+ }
+
+ pmd = dp_netdev_get_pmd(dp, core_id);
+ pmd_health_check(pmd);
+ }
+}
+
static void *
ovs_keepalive(void *f_)
{
@@ -981,6 +1003,7 @@ ovs_keepalive(void *f_)
int n_pmds = cmap_count(&dp->poll_threads) - 1;
if (n_pmds > 0) {
dispatch_heartbeats();
+ get_datapath_health(dp);
get_ka_stats();
}
@@ -42,6 +42,8 @@ struct keepalive_shm *ka_shm = NULL;
static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+static enum pmdhealth_check ka_pmd_core_health[KEEPALIVE_MAXCORES];
+
/* Return the Keepalive shared memory block name. */
static inline const char *
get_ka_shm_blk(void)
@@ -135,6 +137,50 @@ ka_mark_pmd_thread_sleep(void)
}
void
+ka_init_pmd_health_check(void)
+{
+ for (int core_id = 0; core_id < KEEPALIVE_MAXCORES; core_id++) {
+ ka_pmd_core_health[core_id] = PMD_HC_DISABLE;
+ }
+}
+
+void
+ka_enable_pmd_health_check(unsigned core_id)
+{
+ if (is_ka_enabled()) {
+ ka_pmd_core_health[core_id] = PMD_HC_ENABLE;
+ }
+}
+
+void
+ka_disable_pmd_health_check(unsigned core_id)
+{
+ if (is_ka_enabled()) {
+ ka_pmd_core_health[core_id] = PMD_HC_DISABLE;
+ }
+}
+
+bool
+ka_is_pmdhealth_check_enabled(unsigned core_id)
+{
+ return (ka_pmd_core_health[core_id] != PMD_HC_DISABLE);
+}
+
+enum pmdhealth_check
+ka_get_pmd_health_check_state(unsigned core_id)
+{
+ return ka_pmd_core_health[core_id];
+}
+
+void
+ka_set_pmd_health_check_state(unsigned core_id, enum pmdhealth_check state)
+{
+ if (is_ka_enabled()) {
+ ka_pmd_core_health[core_id] = state;
+ }
+}
+
+void
ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
uint64_t last_alive)
{
@@ -423,6 +469,7 @@ ka_init(const struct smap *ovs_other_config)
if (!err) {
VLOG_INFO("OvS Keepalive - initialized.");
ka_init_status = ka_init_success;
+ ka_init_pmd_health_check();
}
} else {
VLOG_ERR("keepalive_shm_create() failed.");
@@ -53,6 +53,13 @@ enum keepalive_status {
ka_init_success
};
+enum pmdhealth_check {
+ PMD_HC_DISABLE,
+ PMD_HC_ENABLE,
+ PMD_HC_PROGRESS,
+ PMD_HC_COMPLETE
+};
+
void ka_init(const struct smap *);
struct keepalive_shm *get_ka_shm(void);
void ka_set_pmd_state_ts(unsigned, enum keepalive_state, uint64_t);
@@ -62,6 +69,13 @@ void ka_unregister_pmd_thread(unsigned);
void ka_mark_pmd_thread_alive(void);
void ka_mark_pmd_thread_sleep(void);
+void ka_init_pmd_health_check(void);
+void ka_enable_pmd_health_check(unsigned);
+void ka_disable_pmd_health_check(unsigned);
+bool ka_is_pmdhealth_check_enabled(unsigned);
+enum pmdhealth_check ka_get_pmd_health_check_state(unsigned);
+void ka_set_pmd_health_check_state(unsigned, enum pmdhealth_check);
+
void ka_store_pmd_id(unsigned core);
uint32_t ka_get_tid(unsigned core);
bool is_ka_enabled(void);
This commit enables additional datapath health checks. The checks are enabled only on a PMD heartbeat failure. On missing three successive heartbeats additional health checks needs to be performed on respective PMD thread to confirm the failure. The datapath health is monitored periodically from keepalive thread. It should be noted that the PMD health checks are only performed on the PMD threads whose health check is enabled. Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com> --- lib/dpif-netdev.c | 23 +++++++++++++++++++++++ lib/keepalive.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ lib/keepalive.h | 14 ++++++++++++++ 3 files changed, 84 insertions(+)