diff mbox series

[ovs-dev,ovs,v1,3/4] tnl-neigh-cache: Allow openvswitch learning neigh entries.

Message ID 20201214022001.84273-4-xiangxia.m.yue@gmail.com
State Deferred
Headers show
Series Support Flow Bifurcation | expand

Commit Message

Tonghao Zhang Dec. 14, 2020, 2:20 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

In flow bifurcation case, in the system there is only one
IP address, for example, IPv4 address. We assign it to PF
netdevice, but not openvswitch bridge. We hope steering
the tunnel packets to openvswitch from PF and building tunnel
packets. When buiding the tunnel packets, openvswitch can
use the neigh entries learned from system.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 lib/dpif-netdev.c     |   1 +
 lib/tnl-neigh-cache.c | 312 +++++++++++++++++++++++++++++++++++++++++++++++---
 lib/tnl-neigh-cache.h |   2 +
 vswitchd/bridge.c     |   2 +
 vswitchd/vswitch.xml  |  17 +++
 5 files changed, 321 insertions(+), 13 deletions(-)

Comments

0-day Robot Dec. 14, 2020, 3:05 a.m. UTC | #1
Bleep bloop.  Greetings Tonghao Zhang, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 86 characters long (recommended limit is 79)
#451 FILE: lib/tnl-neigh-cache.c:600:
            VLOG_ERR("Can't create nln handle or notifier for neighboring subsystem");

WARNING: Line is 83 characters long (recommended limit is 79)
#517 FILE: vswitchd/vswitch.xml:228:
          Set this value to <code>true</code> to enable learning neigh from system.

WARNING: Line is 84 characters long (recommended limit is 79)
#521 FILE: vswitchd/vswitch.xml:232:
          If enabled, Open vSwitch can learn the neigh entries from system. Then you

WARNING: Line is 85 characters long (recommended limit is 79)
#523 FILE: vswitchd/vswitch.xml:234:
          when encapsulating tunnel packets(e.g. native_tunnel_output), we try to use

WARNING: Line is 92 characters long (recommended limit is 79)
#524 FILE: vswitchd/vswitch.xml:235:
          the neigh entry which learned from system. That is useful for the flow bifurcation

WARNING: Line is 83 characters long (recommended limit is 79)
#526 FILE: vswitchd/vswitch.xml:237:
          to split traffic between Linux user space and kernel space. More details:

WARNING: Line is 91 characters long (recommended limit is 79)
#527 FILE: vswitchd/vswitch.xml:238:
          http://git.dpdk.org/next/dpdk-next-net/tree/doc/guides/howto/flow_bifurcation.rst

Lines checked: 536, Warnings: 7, Errors: 0


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 300861ca5..edc4122af 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5804,6 +5804,7 @@  dpif_netdev_wait(struct dpif *dpif)
     ovs_mutex_unlock(&dp->port_mutex);
     ovs_mutex_unlock(&dp_netdev_mutex);
     seq_wait(tnl_conf_seq, dp->last_tnl_conf_seq);
+    tnl_neigh_cache_wait();
 }
 
 static void
diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 5bda4af7e..8f346ba78 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -22,6 +22,8 @@ 
 #include <sys/types.h>
 #include <netinet/in.h>
 #include <netinet/icmp6.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_ether.h>
 #include <stdlib.h>
 
 #include "bitmap.h"
@@ -35,6 +37,7 @@ 
 #include "ovs-thread.h"
 #include "packets.h"
 #include "openvswitch/poll-loop.h"
+#include "openvswitch/ofpbuf.h"
 #include "seq.h"
 #include "socket-util.h"
 #include "timeval.h"
@@ -42,10 +45,16 @@ 
 #include "unixctl.h"
 #include "util.h"
 #include "openvswitch/vlog.h"
+#include "netlink-notifier.h"
+#include "netlink-socket.h"
+#include "netlink.h"
+#include "smap.h"
 
+VLOG_DEFINE_THIS_MODULE(tnl_neigh_cache);
 
 /* In seconds */
 #define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60)
+#define NUD_VALID (NUD_PERMANENT|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
 
 struct tnl_neigh_entry {
     struct cmap_node cmap_node;
@@ -53,10 +62,30 @@  struct tnl_neigh_entry {
     struct eth_addr mac;
     time_t expires;             /* Expiration time. */
     char br_name[IFNAMSIZ];
+    bool event;
 };
 
+enum tnl_neigh_nlmsg_op {
+    TNL_NEIGH_NLMSG_ADD = 1,
+    TNL_NEIGH_NLMSG_DEL,
+};
+
+struct tnl_neigh_nlmsg {
+    struct in6_addr ip;
+    struct eth_addr mac;
+    char br_name[IFNAMSIZ];
+    enum tnl_neigh_nlmsg_op op;
+};
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
 static struct cmap table = CMAP_INITIALIZER;
 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct nln_notifier *neigh_notifier = NULL;
+static struct nln *neigh_nln = NULL;
+static struct tnl_neigh_nlmsg tnmsg;
+
+static int tnl_neigh_event_parse(struct ofpbuf *, struct tnl_neigh_nlmsg *);
+static void tnl_neigh_event_change(const struct tnl_neigh_nlmsg *, void *);
 
 static uint32_t
 tnl_neigh_hash(const struct in6_addr *ip)
@@ -72,7 +101,8 @@  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
 
     hash = tnl_neigh_hash(dst);
     CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
-        if (ipv6_addr_equals(&neigh->ip, dst) && !strcmp(neigh->br_name, br_name)) {
+        if (ipv6_addr_equals(&neigh->ip, dst) &&
+            !strcmp(neigh->br_name, br_name) && !neigh->event) {
             if (neigh->expires <= time_now()) {
                 return NULL;
             }
@@ -81,6 +111,15 @@  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
             return neigh;
         }
     }
+
+    /* To check whether neigh entry available which learned from system. */
+    CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
+        if (ipv6_addr_equals(&neigh->ip, dst) &&
+            neigh->event) {
+            return neigh;
+        }
+    }
+
     return NULL;
 }
 
@@ -114,15 +153,13 @@  tnl_neigh_delete(struct tnl_neigh_entry *neigh)
 }
 
 static void
-tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
-                const struct eth_addr mac)
+tnl_neigh_set_nolock(const char name[IFNAMSIZ], const struct in6_addr *dst,
+                     const struct eth_addr mac, bool event)
 {
-    ovs_mutex_lock(&mutex);
     struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
     if (neigh) {
         if (eth_addr_equals(neigh->mac, mac)) {
             neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
-            ovs_mutex_unlock(&mutex);
             return;
         }
         tnl_neigh_delete(neigh);
@@ -130,12 +167,39 @@  tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
     seq_change(tnl_conf_seq);
 
     neigh = xmalloc(sizeof *neigh);
-
     neigh->ip = *dst;
     neigh->mac = mac;
+    neigh->event = event;
     neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
     ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
     cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
+}
+
+static void
+tnl_neigh_unset_nolock(const char name[IFNAMSIZ], const struct in6_addr *dst)
+{
+    struct tnl_neigh_entry *neigh;
+    bool changed = false;
+
+    CMAP_FOR_EACH (neigh, cmap_node, &table) {
+        if (!strcmp(neigh->br_name, name) &&
+            ipv6_addr_equals(&neigh->ip, dst) && neigh->event) {
+            tnl_neigh_delete(neigh);
+            changed = true;
+        }
+    }
+
+    if (changed) {
+        seq_change(tnl_conf_seq);
+    }
+}
+
+static void
+tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
+                const struct eth_addr mac)
+{
+    ovs_mutex_lock(&mutex);
+    tnl_neigh_set_nolock(name, dst, mac, false);
     ovs_mutex_unlock(&mutex);
 }
 
@@ -208,11 +272,16 @@  tnl_neigh_cache_run(void)
 
     ovs_mutex_lock(&mutex);
     CMAP_FOR_EACH(neigh, cmap_node, &table) {
-        if (neigh->expires <= time_now()) {
+        if (!neigh->event && neigh->expires <= time_now()) {
             tnl_neigh_delete(neigh);
             changed = true;
         }
     }
+
+    if (neigh_nln) {
+        nln_run(neigh_nln);
+    }
+
     ovs_mutex_unlock(&mutex);
 
     if (changed) {
@@ -220,6 +289,16 @@  tnl_neigh_cache_run(void)
     }
 }
 
+void
+tnl_neigh_cache_wait(void)
+{
+    ovs_mutex_lock(&mutex);
+    if (neigh_nln) {
+        nln_wait(neigh_nln);
+    }
+    ovs_mutex_unlock(&mutex);
+}
+
 void
 tnl_neigh_flush(const char br_name[IFNAMSIZ])
 {
@@ -241,21 +320,29 @@  tnl_neigh_flush(const char br_name[IFNAMSIZ])
 }
 
 static void
-tnl_neigh_cache_flush(struct unixctl_conn *conn, int argc OVS_UNUSED,
-                    const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+tnl_neigh_flush__(bool event)
 {
     struct tnl_neigh_entry *neigh;
     bool changed = false;
 
     ovs_mutex_lock(&mutex);
-    CMAP_FOR_EACH(neigh, cmap_node, &table) {
-        tnl_neigh_delete(neigh);
-        changed = true;
+    CMAP_FOR_EACH (neigh, cmap_node, &table) {
+        if (!event || neigh->event) {
+            tnl_neigh_delete(neigh);
+            changed = true;
+        }
     }
     ovs_mutex_unlock(&mutex);
     if (changed) {
         seq_change(tnl_conf_seq);
     }
+}
+
+static void
+tnl_neigh_cache_flush(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                    const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+    tnl_neigh_flush__(false);
     unixctl_command_reply(conn, "OK");
 }
 
@@ -319,7 +406,7 @@  tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
         ds_put_format(&ds, ETH_ADDR_FMT"   %s",
                       ETH_ADDR_ARGS(neigh->mac), neigh->br_name);
-        if (neigh->expires <= time_now()) {
+        if (!neigh->event && neigh->expires <= time_now()) {
             ds_put_format(&ds, " STALE");
         }
         ds_put_char(&ds, '\n');
@@ -330,6 +417,205 @@  tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
     ds_destroy(&ds);
 }
 
+static int
+tnl_neigh_event_parse(struct ofpbuf *buf, struct tnl_neigh_nlmsg *change)
+{
+    static const struct nl_policy policy[] = {
+        [NDA_DST] = { .type = NL_A_UNSPEC,
+                      .min_len = sizeof(struct in_addr),
+                      .optional = false, },
+        [NDA_LLADDR] = { .type = NL_A_UNSPEC,
+                         .min_len = ETH_ALEN,
+                         .optional = true, },
+    };
+
+    struct nlattr *attrs[ARRAY_SIZE(policy)];
+    const struct nlmsghdr *nlmsg = buf->data;
+    const struct ndmsg *ndm;
+    char namebuf[IFNAMSIZ];
+    bool parsed;
+    struct in6_addr addr;
+
+    /* Process RTM_NEWNEIGH or RTM_DELNEIGH events only. */
+    if (nlmsg->nlmsg_type != RTM_NEWNEIGH &&
+        nlmsg->nlmsg_type != RTM_DELNEIGH) {
+        return 0;
+    }
+
+    ndm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *ndm);
+    if (ndm->ndm_family != AF_INET &&
+        ndm->ndm_family != AF_INET6) {
+        return 0;
+    }
+
+    parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg),
+                             policy, attrs, ARRAY_SIZE(policy));
+    if (!parsed) {
+        VLOG_DBG_RL(&rl, "The tnl neigh event parse failed");
+        return 0;
+    }
+
+    if (!if_indextoname(ndm->ndm_ifindex, namebuf)) {
+        return 0;
+    }
+
+    memset(change, 0, sizeof *change);
+    ovs_strlcpy(change->br_name, namebuf, sizeof change->br_name);
+
+    if (ndm->ndm_family == AF_INET) {
+        const ovs_be32 *ip4;
+        ip4 = nl_attr_get_unspec(attrs[NDA_DST], sizeof *ip4);
+        addr = in6_addr_mapped_ipv4(*ip4);
+    } else {
+        const struct in6_addr *ip6;
+        ip6 = nl_attr_get_unspec(attrs[NDA_DST], sizeof *ip6);
+        addr = *ip6;
+    }
+
+    change->ip = addr;
+    change->op = TNL_NEIGH_NLMSG_DEL;
+    if (nlmsg->nlmsg_type == RTM_NEWNEIGH) {
+        /* If neigh entry was not ready,  will not cache it. */
+        if (!(ndm->ndm_state & NUD_VALID) || !attrs[NDA_LLADDR]) {
+            return 0;
+        }
+
+        const struct eth_addr *mac;
+        mac = nl_attr_get_unspec(attrs[NDA_LLADDR], ETH_ALEN);
+        change->mac = *mac;
+        change->op = TNL_NEIGH_NLMSG_ADD;
+    }
+
+    return RTNLGRP_NEIGH;
+}
+
+static void
+tnl_neigh_event_change(const struct tnl_neigh_nlmsg *change,
+                       void *aux OVS_UNUSED)
+{
+    if (!change) {
+        return;
+    }
+
+    switch (change->op) {
+        case TNL_NEIGH_NLMSG_ADD:
+            VLOG_DBG("Add neigh entry: %s "ETH_ADDR_FMT,
+                     change->br_name, ETH_ADDR_ARGS(change->mac));
+            tnl_neigh_set_nolock(change->br_name, &change->ip,
+                                 change->mac, true);
+            break;
+        case TNL_NEIGH_NLMSG_DEL:
+        {
+            char ip[INET6_ADDRSTRLEN];
+
+            ipv6_string_mapped(ip, &change->ip);
+            VLOG_DBG("Del neigh entry: %s %s", change->br_name, ip);
+            tnl_neigh_unset_nolock(change->br_name, &change->ip);
+            break;
+        }
+        default:
+            VLOG_ERR_RL(&rl, "The message ops of neigh netlink is unknown");
+            break;
+    }
+}
+
+static void
+tnl_neigh_event_uninit(void)
+{
+    if (neigh_notifier) {
+        nln_notifier_destroy(neigh_notifier);
+        neigh_notifier = NULL;
+    }
+
+    if (neigh_nln) {
+        nln_destroy(neigh_nln);
+        neigh_nln = NULL;
+    }
+}
+
+static int
+tnl_neigh_event_init(void)
+{
+    neigh_nln = nln_create(NETLINK_ROUTE,
+                           (nln_parse_func *) tnl_neigh_event_parse,
+                           &tnmsg);
+    if (!neigh_nln) {
+        return -1;
+    }
+
+    neigh_notifier =
+        nln_notifier_create(neigh_nln, RTNLGRP_NEIGH,
+                            (nln_notify_func *) tnl_neigh_event_change,
+                            NULL);
+    if (!neigh_notifier) {
+        tnl_neigh_event_uninit();
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+tnl_neigh_event_dump(void)
+{
+    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
+    struct ofpbuf request, reply, buf;
+    struct nl_dump dump;
+    struct ndmsg *ndmsg;
+
+    ofpbuf_init(&request, 0);
+    nl_msg_put_nlmsghdr(&request, sizeof *ndmsg, RTM_GETNEIGH,
+                        NLM_F_REQUEST | NLM_F_DUMP);
+
+    ndmsg = ofpbuf_put_zeros(&request, sizeof *ndmsg);
+    ndmsg->ndm_family = AF_UNSPEC;
+
+    nl_dump_start(&dump, NETLINK_ROUTE, &request);
+    ofpbuf_uninit(&request);
+
+    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
+    while (nl_dump_next(&dump, &reply, &buf)) {
+        struct tnl_neigh_nlmsg msg;
+
+        if (tnl_neigh_event_parse(&reply, &msg)) {
+            tnl_neigh_event_change(&msg, NULL);
+        }
+    }
+    ofpbuf_uninit(&buf);
+    return nl_dump_done(&dump);
+}
+
+void
+tnl_neigh_event_enabled(const struct smap *ovs_other_config)
+{
+    int err;
+
+    if (smap_get_bool(ovs_other_config, "tnl-neigh-event-enabled", false)) {
+        if (neigh_nln || neigh_notifier) {
+            return;
+        }
+
+        err = tnl_neigh_event_init();
+        if (err) {
+            VLOG_ERR("Can't create nln handle or notifier for neighboring subsystem");
+            return;
+        }
+
+        err = tnl_neigh_event_dump();
+        if (err) {
+            tnl_neigh_event_uninit();
+            VLOG_ERR("Can't dump neigh entries");
+            return;
+        }
+    } else {
+        if (!neigh_nln && !neigh_notifier) {
+            return;
+        }
+        tnl_neigh_flush__(true);
+        tnl_neigh_event_uninit();
+    }
+}
+
 void
 tnl_neigh_cache_init(void)
 {
diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
index ded9c2f86..f98743d06 100644
--- a/lib/tnl-neigh-cache.h
+++ b/lib/tnl-neigh-cache.h
@@ -37,6 +37,8 @@  int tnl_neigh_lookup(const char dev_name[], const struct in6_addr *dst,
                      struct eth_addr *mac);
 void tnl_neigh_cache_init(void);
 void tnl_neigh_cache_run(void);
+void tnl_neigh_cache_wait(void);
 void tnl_neigh_flush(const char dev_name[]);
+void tnl_neigh_event_enabled(const struct smap *ovs_other_config);
 
 #endif
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 5ed7e8234..161bb5f8b 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -69,6 +69,7 @@ 
 #include "util.h"
 #include "unixctl.h"
 #include "lib/vswitch-idl.h"
+#include "tnl-neigh-cache.h"
 #include "xenserver.h"
 #include "vlan-bitmap.h"
 
@@ -3292,6 +3293,7 @@  bridge_run(void)
         netdev_set_flow_api_enabled(&cfg->other_config);
         dpdk_init(&cfg->other_config);
         userspace_tso_init(&cfg->other_config);
+        tnl_neigh_event_enabled(&cfg->other_config);
     }
 
     /* Initialize the ofproto library.  This only needs to run once, but
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 89a876796..b0f22b534 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -222,6 +222,23 @@ 
         </p>
       </column>
 
+      <column name="other_config" key="tnl-neigh-event-enabled"
+              type='{"type": "boolean"}'>
+        <p>
+          Set this value to <code>true</code> to enable learning neigh from system.
+          The default value is <code>false</code>.
+        </p>
+        <p>
+          If enabled, Open vSwitch can learn the neigh entries from system. Then you
+          may not configure tunnel IP address on Open vSwitch bridge,
+          when encapsulating tunnel packets(e.g. native_tunnel_output), we try to use
+          the neigh entry which learned from system. That is useful for the flow bifurcation
+          that is a mechanism which uses hardware capable Ethernet devices
+          to split traffic between Linux user space and kernel space. More details:
+          http://git.dpdk.org/next/dpdk-next-net/tree/doc/guides/howto/flow_bifurcation.rst
+        </p>
+      </column>
+
       <column name="other_config" key="hw-offload"
               type='{"type": "boolean"}'>
         <p>