[ovs-dev,1/8] netlink: provide network namespace id from a msg.

Message ID 20171102190509.2688-2-fbl@redhat.com
State New
Headers show
Series
  • Add minimum network namespace support.
Related show

Commit Message

Flavio Leitner Nov. 2, 2017, 7:05 p.m.
The netlink notification's ancillary data contains the network
namespace id (netnsid) needed to identify the device correctly.
(ifindex and netnsid).

Signed-off-by: Flavio Leitner <fbl@redhat.com>
---
 configure.ac           |   3 +-
 lib/automake.mk        |   1 +
 lib/dpif-netlink.c     |   6 +--
 lib/netdev-linux.c     |   2 +-
 lib/netlink-notifier.c |   2 +-
 lib/netlink-socket.c   |  37 ++++++++++++---
 lib/netlink-socket.h   |   4 +-
 lib/netns.h            | 119 +++++++++++++++++++++++++++++++++++++++++++++++++
 utilities/nlmon.c      |   2 +-
 9 files changed, 162 insertions(+), 14 deletions(-)
 create mode 100644 lib/netns.h

Comments

Ben Pfaff Nov. 3, 2017, 6:26 p.m. | #1
On Thu, Nov 02, 2017 at 05:05:02PM -0200, Flavio Leitner wrote:
> The netlink notification's ancillary data contains the network
> namespace id (netnsid) needed to identify the device correctly.
> (ifindex and netnsid).
> 
> Signed-off-by: Flavio Leitner <fbl@redhat.com>

Thanks a lot for working on this.

I did not fully review this patch, but one thing that would make me more
comfortable with cmsg handling is if the code would identify SCM_RIGHTS
cmsgs and close the fds that they contain.  I don't know currently
whether the kernel ever sends fds to userspace over netlink cmsgs, but
for unix domain socket messages sent between user processes it is risky
to accept cmsg data without closing any received fds: it makes the
receiving process prone to fd leaks.
Flavio Leitner Nov. 7, 2017, 12:42 p.m. | #2
On Fri, 3 Nov 2017 11:26:47 -0700
Ben Pfaff <blp@ovn.org> wrote:

> On Thu, Nov 02, 2017 at 05:05:02PM -0200, Flavio Leitner wrote:
> > The netlink notification's ancillary data contains the network
> > namespace id (netnsid) needed to identify the device correctly.
> > (ifindex and netnsid).
> > 
> > Signed-off-by: Flavio Leitner <fbl@redhat.com>  
> 
> Thanks a lot for working on this.
> 
> I did not fully review this patch, but one thing that would make me more
> comfortable with cmsg handling is if the code would identify SCM_RIGHTS
> cmsgs and close the fds that they contain.  I don't know currently
> whether the kernel ever sends fds to userspace over netlink cmsgs, but
> for unix domain socket messages sent between user processes it is risky
> to accept cmsg data without closing any received fds: it makes the
> receiving process prone to fd leaks.

It seems that kernel doesn't send any fds through netlink messages,
not sure on Windows though. But I agree OVS could be on the defensive
side and close if it finds fds.

Let's see if there are other comments today, otherwise I will respin
the patchset including the fix.

Thanks Ben!

Patch

diff --git a/configure.ac b/configure.ac
index 27425991a..522704f9d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -113,7 +113,8 @@  AC_CHECK_MEMBERS([struct sockaddr_in6.sin6_scope_id], [], [],
   [[#include <sys/socket.h>
 #include <netinet/in.h>]])
 AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r sendmmsg])
-AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h stdatomic.h])
+AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h ])
+AC_CHECK_HEADERS([linux/net_namespace.h stdatomic.h])
 AC_CHECK_HEADERS([net/if_mib.h], [], [], [[#include <sys/types.h>
 #include <net/if.h>]])
 
diff --git a/lib/automake.mk b/lib/automake.mk
index ca1cf5dd2..e354cd96f 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -140,6 +140,7 @@  lib_libopenvswitch_la_SOURCES = \
 	lib/netflow.h \
 	lib/netlink.c \
 	lib/netlink.h \
+	lib/netns.h \
 	lib/nx-match.c \
 	lib/nx-match.h \
 	lib/object-collection.c \
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 29001fbe4..563eb5638 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1287,7 +1287,7 @@  dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
         int error;
 
         ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
-        error = nl_sock_recv(dpif->port_notifier, &buf, false);
+        error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
         if (!error) {
             struct dpif_netlink_vport vport;
 
@@ -2621,7 +2621,7 @@  dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
                 return EAGAIN;
             }
 
-            error = nl_sock_recv(sock_pool[i].nl_sock, buf, false);
+            error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
             if (error == ENOBUFS) {
                 /* ENOBUFS typically means that we've received so many
                  * packets that the buffer overflowed.  Try again
@@ -2696,7 +2696,7 @@  dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
                 return EAGAIN;
             }
 
-            error = nl_sock_recv(ch->sock, buf, false);
+            error = nl_sock_recv(ch->sock, buf, NULL, false);
             if (error == ENOBUFS) {
                 /* ENOBUFS typically means that we've received so many
                  * packets that the buffer overflowed.  Try again
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2ff3e2bcc..0d03ca876 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -638,7 +638,7 @@  netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
         struct ofpbuf buf;
 
         ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
-        error = nl_sock_recv(sock, &buf, false);
+        error = nl_sock_recv(sock, &buf, NULL, false);
         if (!error) {
             struct rtnetlink_change change;
 
diff --git a/lib/netlink-notifier.c b/lib/netlink-notifier.c
index 3acded418..d33904658 100644
--- a/lib/netlink-notifier.c
+++ b/lib/netlink-notifier.c
@@ -187,7 +187,7 @@  nln_run(struct nln *nln)
         int error;
 
         ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
-        error = nl_sock_recv(nln->notify_sock, &buf, false);
+        error = nl_sock_recv(nln->notify_sock, &buf, NULL, false);
         if (!error) {
             int group = nln->parse(&buf, nln->change);
 
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index ccfd55e92..08eb0c8ee 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -19,6 +19,7 @@ 
 #include <errno.h>
 #include <inttypes.h>
 #include <stdlib.h>
+#include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/uio.h>
 #include <unistd.h>
@@ -607,7 +608,8 @@  nl_sock_send_seq(struct nl_sock *sock, const struct ofpbuf *msg,
 }
 
 static int
-nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
+nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, struct netns *ns,
+               bool wait)
 {
     /* We can't accurately predict the size of the data to be received.  The
      * caller is supposed to have allocated enough space in 'buf' to handle the
@@ -618,6 +620,8 @@  nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
     uint8_t tail[65536];
     struct iovec iov[2];
     struct msghdr msg;
+    uint8_t msgctrl[64];
+    struct cmsghdr *cmsg;
     ssize_t retval;
     int error;
 
@@ -632,6 +636,8 @@  nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
     memset(&msg, 0, sizeof msg);
     msg.msg_iov = iov;
     msg.msg_iovlen = 2;
+    msg.msg_control = msgctrl;
+    msg.msg_controllen = sizeof msgctrl;
 
     /* Receive a Netlink message from the kernel.
      *
@@ -706,6 +712,23 @@  nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
     }
 #endif
 
+    if (ns) {
+        /* The network namespace id comes as ancillary data. For older
+         * kernels, this data is either not available or it might be -1,
+         * so it falls back to local network namespace (no id). Latest
+         * kernels return a valid ID only if available or nothing. */
+        netns_set_local(ns);
+        cmsg = CMSG_FIRSTHDR(&msg);
+        while (cmsg != NULL) {
+            if (cmsg->cmsg_level == SOL_NETLINK
+                && cmsg->cmsg_type == NETLINK_LISTEN_ALL_NSID) {
+                int *id = ALIGNED_CAST(int *,  CMSG_DATA(cmsg));
+                netns_set_id(ns, *id);
+            }
+            cmsg = CMSG_NXTHDR(&msg, cmsg);
+        }
+    }
+
     log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol);
     COVERAGE_INC(netlink_received);
 
@@ -714,7 +737,8 @@  nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
 
 /* Tries to receive a Netlink message from the kernel on 'sock' into 'buf'.  If
  * 'wait' is true, waits for a message to be ready.  Otherwise, fails with
- * EAGAIN if the 'sock' receive buffer is empty.
+ * EAGAIN if the 'sock' receive buffer is empty.  If 'ns' is provided, the
+ * network namespace information will be provided.
  *
  * The caller must have initialized 'buf' with an allocation of at least
  * NLMSG_HDRLEN bytes.  For best performance, the caller should allocate enough
@@ -730,9 +754,10 @@  nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
  * Regardless of success or failure, this function resets 'buf''s headroom to
  * 0. */
 int
-nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
+nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, struct netns *ns,
+             bool wait)
 {
-    return nl_sock_recv__(sock, buf, wait);
+    return nl_sock_recv__(sock, buf, ns, wait);
 }
 
 static void
@@ -821,7 +846,7 @@  nl_sock_transact_multiple__(struct nl_sock *sock,
         }
 
         /* Receive a reply. */
-        error = nl_sock_recv__(sock, buf_txn->reply, false);
+        error = nl_sock_recv__(sock, buf_txn->reply, NULL, false);
         if (error) {
             if (error == EAGAIN) {
                 nl_sock_record_errors__(transactions, n, 0);
@@ -1101,7 +1126,7 @@  nl_dump_refill(struct nl_dump *dump, struct ofpbuf *buffer)
     int error;
 
     while (!buffer->size) {
-        error = nl_sock_recv__(dump->sock, buffer, false);
+        error = nl_sock_recv__(dump->sock, buffer, NULL, false);
         if (error) {
             /* The kernel never blocks providing the results of a dump, so
              * error == EAGAIN means that we've read the whole thing, and
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index d3cc64288..348483fad 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -193,6 +193,7 @@ 
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
+#include "netns.h"
 #include "openvswitch/ofpbuf.h"
 #include "ovs-atomic.h"
 #include "ovs-thread.h"
@@ -221,7 +222,8 @@  int nl_sock_unsubscribe_packets(struct nl_sock *sock);
 int nl_sock_send(struct nl_sock *, const struct ofpbuf *, bool wait);
 int nl_sock_send_seq(struct nl_sock *, const struct ofpbuf *,
                      uint32_t nlmsg_seq, bool wait);
-int nl_sock_recv(struct nl_sock *, struct ofpbuf *, bool wait);
+int nl_sock_recv(struct nl_sock *, struct ofpbuf *, struct netns *,
+                 bool wait);
 
 int nl_sock_drain(struct nl_sock *);
 
diff --git a/lib/netns.h b/lib/netns.h
new file mode 100644
index 000000000..2e8bd8b0c
--- /dev/null
+++ b/lib/netns.h
@@ -0,0 +1,119 @@ 
+/*
+ * Copyright (c) 2017 Red Hat Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NETNS_H
+#define NETNS_H 1
+
+#include <stdbool.h>
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+#include <linux/net_namespace.h>
+#define NETNS_NOT_ASSIGNED NETNSA_NSID_NOT_ASSIGNED
+#else
+#define NETNS_NOT_ASSIGNED -1
+#endif
+
+enum netns_state {
+    NETNS_INVALID,      /* not initialized yet */
+    NETNS_LOCAL,        /* local or not supported on older kernels */
+    NETNS_REMOTE        /* on another network namespace with valid ID */
+};
+
+struct netns {
+    enum netns_state state;
+    int id;
+};
+
+/* Prototypes */
+static inline void netns_set_id(struct netns *ns, int id);
+static inline void netns_set_invalid(struct netns *ns);
+static inline bool netns_is_invalid(struct netns *ns);
+static inline void netns_set_local(struct netns *ns);
+static inline bool netns_is_local(struct netns *ns);
+static inline bool netns_is_remote(struct netns *ns);
+static inline bool netns_eq(const struct netns *a, const struct netns *b);
+static inline void netns_copy(struct netns *dst, const struct netns *src);
+
+/* Functions */
+static inline void
+netns_set_id(struct netns *ns, int id)
+{
+    if (!ns) {
+        return;
+    }
+
+    if (id == NETNS_NOT_ASSIGNED) {
+        ns->state = NETNS_LOCAL;
+    } else {
+        ns->state = NETNS_REMOTE;
+        ns->id = id;
+    }
+}
+
+static inline void
+netns_set_invalid(struct netns *ns)
+{
+    ns->state = NETNS_INVALID;
+}
+
+static inline bool
+netns_is_invalid(struct netns *ns)
+{
+    return ns->state == NETNS_INVALID;
+}
+
+static inline void
+netns_set_local(struct netns *ns)
+{
+    ns->state = NETNS_LOCAL;
+}
+
+static inline bool
+netns_is_local(struct netns *ns)
+{
+    return (ns->state == NETNS_LOCAL);
+}
+
+static inline bool
+netns_is_remote(struct netns *ns)
+{
+    return (ns->state == NETNS_REMOTE);
+}
+
+static inline void
+netns_copy(struct netns *dst, const struct netns *src)
+{
+    if (src->state == NETNS_LOCAL || src->state == NETNS_REMOTE) {
+        *dst = *src;
+    }
+}
+
+static inline bool
+netns_eq(const struct netns *a, const struct netns *b)
+{
+    if (a->state == NETNS_LOCAL && b->state == NETNS_LOCAL) {
+        return true;
+    }
+
+    if (a->state == NETNS_REMOTE && b->state == NETNS_REMOTE &&
+        a->id == b->id) {
+        return true;
+    }
+
+    return false;
+}
+
+#endif
diff --git a/utilities/nlmon.c b/utilities/nlmon.c
index 2e16222cb..08a117b58 100644
--- a/utilities/nlmon.c
+++ b/utilities/nlmon.c
@@ -59,7 +59,7 @@  main(int argc OVS_UNUSED, char *argv[])
 
     ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
     for (;;) {
-        error = nl_sock_recv(sock, &buf, false);
+        error = nl_sock_recv(sock, &buf, NULL, false);
         if (error == EAGAIN) {
             /* Nothing to do. */
         } else if (error == ENOBUFS) {