diff mbox

[ovs-dev,RFC,v4,4/6] dpif-netlink: add VXLAN creation support

Message ID 20170118194515.1307-5-e@erig.me
State Superseded
Headers show

Commit Message

Eric Garver Jan. 18, 2017, 7:45 p.m. UTC
Creates VXLAN devices using rtnetlink and tunnel metadata.

Co-Authored-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Co-Authored-by: Eric Garver <e@erig.me>
Signed-off-by: Eric Garver <e@erig.me>
---
 lib/dpif-netlink.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 193 insertions(+), 1 deletion(-)

Comments

Joe Stringer Feb. 2, 2017, 10:59 p.m. UTC | #1
On 18 January 2017 at 11:45, Eric Garver <e@erig.me> wrote:
> Creates VXLAN devices using rtnetlink and tunnel metadata.
>
> Co-Authored-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
> Co-Authored-by: Eric Garver <e@erig.me>
> Signed-off-by: Eric Garver <e@erig.me>
> ---
>  lib/dpif-netlink.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 193 insertions(+), 1 deletion(-)

I think that the vast majority of this code is linux-specific and
should not exist in dpif-netlink.c.

Perhaps we should add a new lib/netdev-lwt.[ch], where the .h file has
the #ifdef __linux__ logic to either declare or define the functions,
then lib/netdev-lwt.c has the implementations of these functions.  The
.h would always be added to the build in lib/automake.mk and the .c
file would only be added in the #if LINUX section.

One might even argue that it'd be tidier if the
dpif_netlink_port_{create,destroy} functions were moved here (and
renamed to something more apt).
Eric Garver Feb. 3, 2017, 8:58 p.m. UTC | #2
On Thu, Feb 02, 2017 at 02:59:42PM -0800, Joe Stringer wrote:
> On 18 January 2017 at 11:45, Eric Garver <e@erig.me> wrote:
> > Creates VXLAN devices using rtnetlink and tunnel metadata.
> >
> > Co-Authored-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
> > Co-Authored-by: Eric Garver <e@erig.me>
> > Signed-off-by: Eric Garver <e@erig.me>
> > ---
> >  lib/dpif-netlink.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 193 insertions(+), 1 deletion(-)
> 
> I think that the vast majority of this code is linux-specific and
> should not exist in dpif-netlink.c.
> 
> Perhaps we should add a new lib/netdev-lwt.[ch], where the .h file has
> the #ifdef __linux__ logic to either declare or define the functions,
> then lib/netdev-lwt.c has the implementations of these functions.  The
> .h would always be added to the build in lib/automake.mk and the .c
> file would only be added in the #if LINUX section.

That's a nice cleanup. Thanks for the suggestion.

> One might even argue that it'd be tidier if the
> dpif_netlink_port_{create,destroy} functions were moved here (and
> renamed to something more apt).

I'll look into this as well.
diff mbox

Patch

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 769806eadbf1..3aeb8480aa48 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -24,7 +24,9 @@ 
 #include <inttypes.h>
 #include <net/if.h>
 #include <linux/types.h>
+#include <linux/ip.h>
 #include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
 #include <poll.h>
 #include <stdlib.h>
 #include <strings.h>
@@ -948,6 +950,194 @@  dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
 
 }
 
+#ifdef __linux__
+
+static int
+netdev_linux_destroy(const char *name)
+{
+    int err;
+    struct ofpbuf request, *reply;
+
+    ofpbuf_init(&request, 0);
+    nl_msg_put_nlmsghdr(&request, 0, RTM_DELLINK,
+                        NLM_F_REQUEST | NLM_F_ACK);
+    ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
+    nl_msg_put_string(&request, IFLA_IFNAME, name);
+
+    err = nl_transact(NETLINK_ROUTE, &request, &reply);
+
+    if (!err) {
+        ofpbuf_uninit(reply);
+    }
+
+    ofpbuf_uninit(&request);
+    return err;
+}
+
+static int
+netdev_vxlan_destroy(const char *name)
+{
+    return netdev_linux_destroy(name);
+}
+
+/*
+ * On some older systems, these enums are not defined.
+ */
+
+#ifndef IFLA_VXLAN_MAX
+#define IFLA_VXLAN_MAX 0
+#define IFLA_VXLAN_PORT 15
+#endif
+#if IFLA_VXLAN_MAX < 20
+#define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
+#define IFLA_VXLAN_GBP 23
+#define IFLA_VXLAN_COLLECT_METADATA 25
+#endif
+
+static const struct nl_policy rtlink_policy[] = {
+    [IFLA_LINKINFO] = { .type = NL_A_NESTED },
+};
+static const struct nl_policy linkinfo_policy[] = {
+    [IFLA_INFO_KIND] = { .type = NL_A_STRING },
+    [IFLA_INFO_DATA] = { .type = NL_A_NESTED },
+};
+
+static int
+netdev_vxlan_verify(struct netdev *netdev, const char *name, const char *kind)
+{
+    int err;
+    struct ofpbuf request, *reply;
+    struct ifinfomsg *ifmsg;
+    const struct netdev_tunnel_config *tnl_cfg;
+
+    static const struct nl_policy vxlan_policy[] = {
+        [IFLA_VXLAN_COLLECT_METADATA] = { .type = NL_A_U8 },
+        [IFLA_VXLAN_LEARNING] = { .type = NL_A_U8 },
+        [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
+        [IFLA_VXLAN_PORT] = { .type = NL_A_U16 },
+    };
+
+    tnl_cfg = netdev_get_tunnel_config(netdev);
+    if (!tnl_cfg) {
+        return EINVAL;
+    }
+
+    ofpbuf_init(&request, 0);
+    nl_msg_put_nlmsghdr(&request, 0, RTM_GETLINK,
+                        NLM_F_REQUEST);
+    ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
+    nl_msg_put_string(&request, IFLA_IFNAME, name);
+
+    err = nl_transact(NETLINK_ROUTE, &request, &reply);
+    if (!err) {
+        struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
+        struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
+        struct nlattr *vxlan[ARRAY_SIZE(vxlan_policy)];
+
+        ifmsg = ofpbuf_at(reply, NLMSG_HDRLEN, sizeof *ifmsg);
+        if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *ifmsg,
+                             rtlink_policy, rtlink,
+                             ARRAY_SIZE(rtlink_policy)) ||
+            !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
+                             linkinfo, ARRAY_SIZE(linkinfo_policy)) ||
+            strcmp(nl_attr_get_string(linkinfo[IFLA_INFO_KIND]), kind) ||
+            !nl_parse_nested(linkinfo[IFLA_INFO_DATA], vxlan_policy, vxlan,
+                             ARRAY_SIZE(vxlan_policy))) {
+            err = EINVAL;
+        }
+        if (!err) {
+            if (0 != nl_attr_get_u8(vxlan[IFLA_VXLAN_LEARNING]) ||
+                1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_COLLECT_METADATA]) ||
+                1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) ||
+                tnl_cfg->dst_port != nl_attr_get_u16(vxlan[IFLA_VXLAN_PORT])) {
+                err = EINVAL;
+            }
+        }
+        if (!err) {
+            if ((tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) &&
+                !(vxlan[IFLA_VXLAN_GBP] &&
+                  nl_attr_get_flag(vxlan[IFLA_VXLAN_GBP]))) {
+                err = EINVAL;
+            }
+        }
+        ofpbuf_uninit(reply);
+    }
+    ofpbuf_uninit(&request);
+    return err;
+}
+
+static int
+netdev_vxlan_create_kind(struct netdev *netdev, const char *kind)
+{
+    int err;
+    struct ofpbuf request, *reply;
+    size_t linkinfo_off, infodata_off;
+    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+    const char *name = netdev_vport_get_dpif_port(netdev,
+                                                  namebuf, sizeof namebuf);
+    struct ifinfomsg *ifinfo;
+    const struct netdev_tunnel_config *tnl_cfg;
+    tnl_cfg = netdev_get_tunnel_config(netdev);
+    if (!tnl_cfg) {
+        return EINVAL;
+    }
+
+    ofpbuf_init(&request, 0);
+    nl_msg_put_nlmsghdr(&request, 0, RTM_NEWLINK,
+                        NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE);
+    ifinfo = ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
+    ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP;
+    nl_msg_put_string(&request, IFLA_IFNAME, name);
+    nl_msg_put_u32(&request, IFLA_MTU, UINT16_MAX);
+    linkinfo_off = nl_msg_start_nested(&request, IFLA_LINKINFO);
+        nl_msg_put_string(&request, IFLA_INFO_KIND, kind);
+        infodata_off = nl_msg_start_nested(&request, IFLA_INFO_DATA);
+            nl_msg_put_u8(&request, IFLA_VXLAN_LEARNING, 0);
+            nl_msg_put_u8(&request, IFLA_VXLAN_COLLECT_METADATA, 1);
+            nl_msg_put_u8(&request, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
+            if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) {
+                nl_msg_put_flag(&request, IFLA_VXLAN_GBP);
+            }
+            nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port);
+        nl_msg_end_nested(&request, infodata_off);
+    nl_msg_end_nested(&request, linkinfo_off);
+
+    err = nl_transact(NETLINK_ROUTE, &request, &reply);
+
+    if (!err) {
+        ofpbuf_uninit(reply);
+    }
+
+    if (!err && (err = netdev_vxlan_verify(netdev, name, kind))) {
+        netdev_vxlan_destroy(name);
+    }
+
+    ofpbuf_uninit(&request);
+    return err;
+}
+
+static int
+netdev_vxlan_create(struct netdev *netdev)
+{
+    return netdev_vxlan_create_kind(netdev, "vxlan");
+}
+
+#else
+
+static int
+netdev_vxlan_create(struct netdev *netdev OVS_UNUSED)
+{
+    return EOPNOTSUPP;
+}
+
+static int
+netdev_vxlan_destroy(const char *name OVS_UNUSED)
+{
+    return EOPNOTSUPP;
+}
+
+#endif
+
 static int
 dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
                           const char *port_name, struct dpif_port *dpif_port);
@@ -957,6 +1147,7 @@  dpif_netlink_port_create(struct netdev *netdev)
 {
     switch (netdev_to_ovs_vport_type(netdev_get_type(netdev))) {
     case OVS_VPORT_TYPE_VXLAN:
+        return netdev_vxlan_create(netdev);
     case OVS_VPORT_TYPE_GRE:
     case OVS_VPORT_TYPE_GENEVE:
     case OVS_VPORT_TYPE_NETDEV:
@@ -972,10 +1163,11 @@  dpif_netlink_port_create(struct netdev *netdev)
 }
 
 static int
-dpif_netlink_port_destroy(const char *name OVS_UNUSED, const char *type)
+dpif_netlink_port_destroy(const char *name, const char *type)
 {
     switch (netdev_to_ovs_vport_type(type)) {
     case OVS_VPORT_TYPE_VXLAN:
+        return netdev_vxlan_destroy(name);
     case OVS_VPORT_TYPE_GRE:
     case OVS_VPORT_TYPE_GENEVE:
     case OVS_VPORT_TYPE_NETDEV: