@@ -5804,6 +5804,7 @@ dpif_netdev_wait(struct dpif *dpif)
ovs_mutex_unlock(&dp->port_mutex);
ovs_mutex_unlock(&dp_netdev_mutex);
seq_wait(tnl_conf_seq, dp->last_tnl_conf_seq);
+ tnl_neigh_cache_wait();
}
static void
@@ -22,6 +22,8 @@
#include <sys/types.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_ether.h>
#include <stdlib.h>
#include "bitmap.h"
@@ -35,6 +37,7 @@
#include "ovs-thread.h"
#include "packets.h"
#include "openvswitch/poll-loop.h"
+#include "openvswitch/ofpbuf.h"
#include "seq.h"
#include "socket-util.h"
#include "timeval.h"
@@ -42,10 +45,16 @@
#include "unixctl.h"
#include "util.h"
#include "openvswitch/vlog.h"
+#include "netlink-notifier.h"
+#include "netlink-socket.h"
+#include "netlink.h"
+#include "smap.h"
+VLOG_DEFINE_THIS_MODULE(tnl_neigh_cache);
/* In seconds */
#define NEIGH_ENTRY_DEFAULT_IDLE_TIME (15 * 60)
+#define NUD_VALID (NUD_PERMANENT|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
struct tnl_neigh_entry {
struct cmap_node cmap_node;
@@ -53,10 +62,30 @@ struct tnl_neigh_entry {
struct eth_addr mac;
time_t expires; /* Expiration time. */
char br_name[IFNAMSIZ];
+ bool event;
};
+enum tnl_neigh_nlmsg_op {
+ TNL_NEIGH_NLMSG_ADD = 1,
+ TNL_NEIGH_NLMSG_DEL,
+};
+
+struct tnl_neigh_nlmsg {
+ struct in6_addr ip;
+ struct eth_addr mac;
+ char br_name[IFNAMSIZ];
+ enum tnl_neigh_nlmsg_op op;
+};
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
static struct cmap table = CMAP_INITIALIZER;
static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct nln_notifier *neigh_notifier = NULL;
+static struct nln *neigh_nln = NULL;
+static struct tnl_neigh_nlmsg tnmsg;
+
+static int tnl_neigh_event_parse(struct ofpbuf *, struct tnl_neigh_nlmsg *);
+static void tnl_neigh_event_change(const struct tnl_neigh_nlmsg *, void *);
static uint32_t
tnl_neigh_hash(const struct in6_addr *ip)
@@ -72,7 +101,8 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
hash = tnl_neigh_hash(dst);
CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
- if (ipv6_addr_equals(&neigh->ip, dst) && !strcmp(neigh->br_name, br_name)) {
+ if (ipv6_addr_equals(&neigh->ip, dst) &&
+ !strcmp(neigh->br_name, br_name) && !neigh->event) {
if (neigh->expires <= time_now()) {
return NULL;
}
@@ -81,6 +111,15 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
return neigh;
}
}
+
+ /* To check whether neigh entry available which learned from system. */
+ CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
+ if (ipv6_addr_equals(&neigh->ip, dst) &&
+ neigh->event) {
+ return neigh;
+ }
+ }
+
return NULL;
}
@@ -114,15 +153,13 @@ tnl_neigh_delete(struct tnl_neigh_entry *neigh)
}
static void
-tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
- const struct eth_addr mac)
+tnl_neigh_set_nolock(const char name[IFNAMSIZ], const struct in6_addr *dst,
+ const struct eth_addr mac, bool event)
{
- ovs_mutex_lock(&mutex);
struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
if (neigh) {
if (eth_addr_equals(neigh->mac, mac)) {
neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
- ovs_mutex_unlock(&mutex);
return;
}
tnl_neigh_delete(neigh);
@@ -130,12 +167,39 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
seq_change(tnl_conf_seq);
neigh = xmalloc(sizeof *neigh);
-
neigh->ip = *dst;
neigh->mac = mac;
+ neigh->event = event;
neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+}
+
+static void
+tnl_neigh_unset_nolock(const char name[IFNAMSIZ], const struct in6_addr *dst)
+{
+ struct tnl_neigh_entry *neigh;
+ bool changed = false;
+
+ CMAP_FOR_EACH (neigh, cmap_node, &table) {
+ if (!strcmp(neigh->br_name, name) &&
+ ipv6_addr_equals(&neigh->ip, dst) && neigh->event) {
+ tnl_neigh_delete(neigh);
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ seq_change(tnl_conf_seq);
+ }
+}
+
+static void
+tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
+ const struct eth_addr mac)
+{
+ ovs_mutex_lock(&mutex);
+ tnl_neigh_set_nolock(name, dst, mac, false);
ovs_mutex_unlock(&mutex);
}
@@ -208,11 +272,16 @@ tnl_neigh_cache_run(void)
ovs_mutex_lock(&mutex);
CMAP_FOR_EACH(neigh, cmap_node, &table) {
- if (neigh->expires <= time_now()) {
+ if (!neigh->event && neigh->expires <= time_now()) {
tnl_neigh_delete(neigh);
changed = true;
}
}
+
+ if (neigh_nln) {
+ nln_run(neigh_nln);
+ }
+
ovs_mutex_unlock(&mutex);
if (changed) {
@@ -220,6 +289,16 @@ tnl_neigh_cache_run(void)
}
}
+void
+tnl_neigh_cache_wait(void)
+{
+ ovs_mutex_lock(&mutex);
+ if (neigh_nln) {
+ nln_wait(neigh_nln);
+ }
+ ovs_mutex_unlock(&mutex);
+}
+
void
tnl_neigh_flush(const char br_name[IFNAMSIZ])
{
@@ -241,21 +320,29 @@ tnl_neigh_flush(const char br_name[IFNAMSIZ])
}
static void
-tnl_neigh_cache_flush(struct unixctl_conn *conn, int argc OVS_UNUSED,
- const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+tnl_neigh_flush__(bool event)
{
struct tnl_neigh_entry *neigh;
bool changed = false;
ovs_mutex_lock(&mutex);
- CMAP_FOR_EACH(neigh, cmap_node, &table) {
- tnl_neigh_delete(neigh);
- changed = true;
+ CMAP_FOR_EACH (neigh, cmap_node, &table) {
+ if (!event || neigh->event) {
+ tnl_neigh_delete(neigh);
+ changed = true;
+ }
}
ovs_mutex_unlock(&mutex);
if (changed) {
seq_change(tnl_conf_seq);
}
+}
+
+static void
+tnl_neigh_cache_flush(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+ tnl_neigh_flush__(false);
unixctl_command_reply(conn, "OK");
}
@@ -319,7 +406,7 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
ds_put_format(&ds, ETH_ADDR_FMT" %s",
ETH_ADDR_ARGS(neigh->mac), neigh->br_name);
- if (neigh->expires <= time_now()) {
+ if (!neigh->event && neigh->expires <= time_now()) {
ds_put_format(&ds, " STALE");
}
ds_put_char(&ds, '\n');
@@ -330,6 +417,205 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
ds_destroy(&ds);
}
+static int
+tnl_neigh_event_parse(struct ofpbuf *buf, struct tnl_neigh_nlmsg *change)
+{
+ static const struct nl_policy policy[] = {
+ [NDA_DST] = { .type = NL_A_UNSPEC,
+ .min_len = sizeof(struct in_addr),
+ .optional = false, },
+ [NDA_LLADDR] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN,
+ .optional = true, },
+ };
+
+ struct nlattr *attrs[ARRAY_SIZE(policy)];
+ const struct nlmsghdr *nlmsg = buf->data;
+ const struct ndmsg *ndm;
+ char namebuf[IFNAMSIZ];
+ bool parsed;
+ struct in6_addr addr;
+
+ /* Process RTM_NEWNEIGH or RTM_DELNEIGH events only. */
+ if (nlmsg->nlmsg_type != RTM_NEWNEIGH &&
+ nlmsg->nlmsg_type != RTM_DELNEIGH) {
+ return 0;
+ }
+
+ ndm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *ndm);
+ if (ndm->ndm_family != AF_INET &&
+ ndm->ndm_family != AF_INET6) {
+ return 0;
+ }
+
+ parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg),
+ policy, attrs, ARRAY_SIZE(policy));
+ if (!parsed) {
+ VLOG_DBG_RL(&rl, "The tnl neigh event parse failed");
+ return 0;
+ }
+
+ if (!if_indextoname(ndm->ndm_ifindex, namebuf)) {
+ return 0;
+ }
+
+ memset(change, 0, sizeof *change);
+ ovs_strlcpy(change->br_name, namebuf, sizeof change->br_name);
+
+ if (ndm->ndm_family == AF_INET) {
+ const ovs_be32 *ip4;
+ ip4 = nl_attr_get_unspec(attrs[NDA_DST], sizeof *ip4);
+ addr = in6_addr_mapped_ipv4(*ip4);
+ } else {
+ const struct in6_addr *ip6;
+ ip6 = nl_attr_get_unspec(attrs[NDA_DST], sizeof *ip6);
+ addr = *ip6;
+ }
+
+ change->ip = addr;
+ change->op = TNL_NEIGH_NLMSG_DEL;
+ if (nlmsg->nlmsg_type == RTM_NEWNEIGH) {
+ /* If neigh entry was not ready, will not cache it. */
+ if (!(ndm->ndm_state & NUD_VALID) || !attrs[NDA_LLADDR]) {
+ return 0;
+ }
+
+ const struct eth_addr *mac;
+ mac = nl_attr_get_unspec(attrs[NDA_LLADDR], ETH_ALEN);
+ change->mac = *mac;
+ change->op = TNL_NEIGH_NLMSG_ADD;
+ }
+
+ return RTNLGRP_NEIGH;
+}
+
+static void
+tnl_neigh_event_change(const struct tnl_neigh_nlmsg *change,
+ void *aux OVS_UNUSED)
+{
+ if (!change) {
+ return;
+ }
+
+ switch (change->op) {
+ case TNL_NEIGH_NLMSG_ADD:
+ VLOG_DBG("Add neigh entry: %s "ETH_ADDR_FMT,
+ change->br_name, ETH_ADDR_ARGS(change->mac));
+ tnl_neigh_set_nolock(change->br_name, &change->ip,
+ change->mac, true);
+ break;
+ case TNL_NEIGH_NLMSG_DEL:
+ {
+ char ip[INET6_ADDRSTRLEN];
+
+ ipv6_string_mapped(ip, &change->ip);
+ VLOG_DBG("Del neigh entry: %s %s", change->br_name, ip);
+ tnl_neigh_unset_nolock(change->br_name, &change->ip);
+ break;
+ }
+ default:
+ VLOG_ERR_RL(&rl, "The message ops of neigh netlink is unknown");
+ break;
+ }
+}
+
+static void
+tnl_neigh_event_uninit(void)
+{
+ if (neigh_notifier) {
+ nln_notifier_destroy(neigh_notifier);
+ neigh_notifier = NULL;
+ }
+
+ if (neigh_nln) {
+ nln_destroy(neigh_nln);
+ neigh_nln = NULL;
+ }
+}
+
+static int
+tnl_neigh_event_init(void)
+{
+ neigh_nln = nln_create(NETLINK_ROUTE,
+ (nln_parse_func *) tnl_neigh_event_parse,
+ &tnmsg);
+ if (!neigh_nln) {
+ return -1;
+ }
+
+ neigh_notifier =
+ nln_notifier_create(neigh_nln, RTNLGRP_NEIGH,
+ (nln_notify_func *) tnl_neigh_event_change,
+ NULL);
+ if (!neigh_notifier) {
+ tnl_neigh_event_uninit();
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+tnl_neigh_event_dump(void)
+{
+ uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
+ struct ofpbuf request, reply, buf;
+ struct nl_dump dump;
+ struct ndmsg *ndmsg;
+
+ ofpbuf_init(&request, 0);
+ nl_msg_put_nlmsghdr(&request, sizeof *ndmsg, RTM_GETNEIGH,
+ NLM_F_REQUEST | NLM_F_DUMP);
+
+ ndmsg = ofpbuf_put_zeros(&request, sizeof *ndmsg);
+ ndmsg->ndm_family = AF_UNSPEC;
+
+ nl_dump_start(&dump, NETLINK_ROUTE, &request);
+ ofpbuf_uninit(&request);
+
+ ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
+ while (nl_dump_next(&dump, &reply, &buf)) {
+ struct tnl_neigh_nlmsg msg;
+
+ if (tnl_neigh_event_parse(&reply, &msg)) {
+ tnl_neigh_event_change(&msg, NULL);
+ }
+ }
+ ofpbuf_uninit(&buf);
+ return nl_dump_done(&dump);
+}
+
+void
+tnl_neigh_event_enabled(const struct smap *ovs_other_config)
+{
+ int err;
+
+ if (smap_get_bool(ovs_other_config, "tnl-neigh-event-enabled", false)) {
+ if (neigh_nln || neigh_notifier) {
+ return;
+ }
+
+ err = tnl_neigh_event_init();
+ if (err) {
+ VLOG_ERR("Can't create nln handle or notifier for neighboring subsystem");
+ return;
+ }
+
+ err = tnl_neigh_event_dump();
+ if (err) {
+ tnl_neigh_event_uninit();
+ VLOG_ERR("Can't dump neigh entries");
+ return;
+ }
+ } else {
+ if (!neigh_nln && !neigh_notifier) {
+ return;
+ }
+ tnl_neigh_flush__(true);
+ tnl_neigh_event_uninit();
+ }
+}
+
void
tnl_neigh_cache_init(void)
{
@@ -37,6 +37,8 @@ int tnl_neigh_lookup(const char dev_name[], const struct in6_addr *dst,
struct eth_addr *mac);
void tnl_neigh_cache_init(void);
void tnl_neigh_cache_run(void);
+void tnl_neigh_cache_wait(void);
void tnl_neigh_flush(const char dev_name[]);
+void tnl_neigh_event_enabled(const struct smap *ovs_other_config);
#endif
@@ -69,6 +69,7 @@
#include "util.h"
#include "unixctl.h"
#include "lib/vswitch-idl.h"
+#include "tnl-neigh-cache.h"
#include "xenserver.h"
#include "vlan-bitmap.h"
@@ -3292,6 +3293,7 @@ bridge_run(void)
netdev_set_flow_api_enabled(&cfg->other_config);
dpdk_init(&cfg->other_config);
userspace_tso_init(&cfg->other_config);
+ tnl_neigh_event_enabled(&cfg->other_config);
}
/* Initialize the ofproto library. This only needs to run once, but
@@ -222,6 +222,23 @@
</p>
</column>
+ <column name="other_config" key="tnl-neigh-event-enabled"
+ type='{"type": "boolean"}'>
+ <p>
+ Set this value to <code>true</code> to enable learning neigh from system.
+ The default value is <code>false</code>.
+ </p>
+ <p>
+ If enabled, Open vSwitch can learn the neigh entries from system. Then you
+ may not configure tunnel IP address on Open vSwitch bridge,
+ when encapsulating tunnel packets(e.g. native_tunnel_output), we try to use
+ the neigh entry which learned from system. That is useful for the flow bifurcation
+ that is a mechanism which uses hardware capable Ethernet devices
+ to split traffic between Linux user space and kernel space. More details:
+ http://git.dpdk.org/next/dpdk-next-net/tree/doc/guides/howto/flow_bifurcation.rst
+ </p>
+ </column>
+
<column name="other_config" key="hw-offload"
type='{"type": "boolean"}'>
<p>