diff mbox series

[ovs-dev,V1,3/4] Fix tap interface statistics issue

Message ID 20200817012745.160200-4-yang_y_yi@163.com
State Changes Requested
Headers show
Series Enable pmd to support for system interfaces | expand

Commit Message

yang_y_yi Aug. 17, 2020, 1:27 a.m. UTC
From: Yi Yang <yangyi01@inspur.com>

After tap interface is moved to network namespace,
"ovs-vsctl list interface tapXXX" can get statistics
info of tap interface, the root cause is OVS still
gets statistics info in root namespace.

With netns option help, OVS can get statistics info
in tap interface netns.

This patch added enter and exit netns helpers and
change statistics-related functions for those tap
interfaces which have been moved into netns and
make sure "ovs-vsctl list interface tapXXX" can
get statistics info correctly.

Here is a result sample for reference:
name                : tap1
ofport              : 4
ofport_request      : []
options             : {netns=ns01}
other_config        : {}
statistics          : {rx_bytes=6228, rx_packets=68, tx_bytes=8310, tx_packets=95}
status              : {}
type                : tap

Signed-off-by: Yi Yang <yangyi01@inspur.com>
---
 lib/dpif-netlink.c   |  51 ++++++++++++++++++
 lib/dpif-netlink.h   |   3 ++
 lib/netdev-linux.c   |  60 ++++++++++++++++++++-
 lib/netlink-socket.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/netlink-socket.h |   2 +
 5 files changed, 260 insertions(+), 2 deletions(-)

Comments

Aaron Conole Aug. 25, 2020, 3:05 p.m. UTC | #1
yang_y_yi@163.com writes:

> From: Yi Yang <yangyi01@inspur.com>
>
> After tap interface is moved to network namespace,
> "ovs-vsctl list interface tapXXX" can get statistics
> info of tap interface, the root cause is OVS still
> gets statistics info in root namespace.
>
> With netns option help, OVS can get statistics info
> in tap interface netns.
>
> This patch added enter and exit netns helpers and
> change statistics-related functions for those tap
> interfaces which have been moved into netns and
> make sure "ovs-vsctl list interface tapXXX" can
> get statistics info correctly.
>
> Here is a result sample for reference:
> name                : tap1
> ofport              : 4
> ofport_request      : []
> options             : {netns=ns01}
> other_config        : {}
> statistics          : {rx_bytes=6228, rx_packets=68, tx_bytes=8310, tx_packets=95}
> status              : {}
> type                : tap
>
> Signed-off-by: Yi Yang <yangyi01@inspur.com>
> ---
>  lib/dpif-netlink.c   |  51 ++++++++++++++++++
>  lib/dpif-netlink.h   |   3 ++
>  lib/netdev-linux.c   |  60 ++++++++++++++++++++-
>  lib/netlink-socket.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/netlink-socket.h |   2 +
>  5 files changed, 260 insertions(+), 2 deletions(-)
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 7da4fb5..8ed37ed 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -4282,6 +4282,43 @@ dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
>      return error;
>  }
>  
> +static int
> +dpif_netlink_vport_transact_nopool(const struct dpif_netlink_vport *request,
> +                                   struct dpif_netlink_vport *reply,
> +                                   struct ofpbuf **bufp)
> +{
> +    struct ofpbuf *request_buf;
> +    int error;
> +
> +    ovs_assert((reply != NULL) == (bufp != NULL));
> +
> +    error = dpif_netlink_init();
> +    if (error) {
> +        if (reply) {
> +            *bufp = NULL;
> +            dpif_netlink_vport_init(reply);
> +        }
> +        return error;
> +    }
> +
> +    request_buf = ofpbuf_new(1024);
> +    dpif_netlink_vport_to_ofpbuf(request, request_buf);
> +    error = nl_transact_nopool(NETLINK_GENERIC, request_buf, bufp);
> +    ofpbuf_delete(request_buf);
> +
> +    if (reply) {
> +        if (!error) {
> +            error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
> +        }
> +        if (error) {
> +            dpif_netlink_vport_init(reply);
> +            ofpbuf_delete(*bufp);
> +            *bufp = NULL;
> +        }
> +    }
> +    return error;
> +}
> +
>  /* Obtains information about the kernel vport named 'name' and stores it into
>   * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
>   * longer needed ('reply' will contain pointers into '*bufp').  */
> @@ -4298,6 +4335,20 @@ dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
>      return dpif_netlink_vport_transact(&request, reply, bufp);
>  }
>  
> +int
> +dpif_netlink_vport_get_nopool(const char *name,
> +                              struct dpif_netlink_vport *reply,
> +                              struct ofpbuf **bufp)
> +{
> +    struct dpif_netlink_vport request;
> +
> +    dpif_netlink_vport_init(&request);
> +    request.cmd = OVS_VPORT_CMD_GET;
> +    request.name = name;
> +
> +    return dpif_netlink_vport_transact_nopool(&request, reply, bufp);
> +}
> +
>  /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
>   * by Netlink attributes, into 'dp'.  Returns 0 if successful, otherwise a
>   * positive errno value.
> diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h
> index 24294bc..9372241 100644
> --- a/lib/dpif-netlink.h
> +++ b/lib/dpif-netlink.h
> @@ -55,6 +55,9 @@ int dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
>                                  struct ofpbuf **bufp);
>  int dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
>                             struct ofpbuf **bufp);
> +int dpif_netlink_vport_get_nopool(const char *name,
> +                                  struct dpif_netlink_vport *reply,
> +                                  struct ofpbuf **bufp);
>  
>  bool dpif_netlink_is_internal_device(const char *name);
>  
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index bb3aa9b..2a7d5ec 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -520,8 +520,16 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>   * changes in the device miimon status, so we can use atomic_count. */
>  static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>  
> +struct netns_knob {
> +    int main_fd;
> +    int netns_fd;
> +    char nspath[128];
> +};
> +
>  static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
>  static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> +static int enter_netns(struct netns_knob *netns_knob, const char *netns);
> +static void exit_netns(struct netns_knob *netns_knob);
>  static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
>                                     int cmd, const char *cmd_name);
>  static int get_flags(const struct netdev *, unsigned int *flags);
> @@ -2158,11 +2166,18 @@ netdev_stats_from_ovs_vport_stats(struct netdev_stats *dst,
>  static int
>  get_stats_via_vport__(const struct netdev *netdev, struct netdev_stats *stats)
>  {
> +    struct netdev_linux *netdev_linux = netdev_linux_cast(netdev);
>      struct dpif_netlink_vport reply;
>      struct ofpbuf *buf;
>      int error;
>  
> -    error = dpif_netlink_vport_get(netdev_get_name(netdev), &reply, &buf);
> +    if (is_tap_netdev(netdev) && (netdev_linux->netns != NULL)) {
> +        error = dpif_netlink_vport_get_nopool(netdev_get_name(netdev),
> +                                              &reply, &buf);
> +    } else {
> +        error = dpif_netlink_vport_get(netdev_get_name(netdev),
> +                                              &reply, &buf);
> +    }
>      if (error) {
>          return error;
>      } else if (!reply.stats) {
> @@ -2237,6 +2252,33 @@ netdev_linux_get_stats(const struct netdev *netdev_,
>      return error;
>  }
>  
> +static int
> +enter_netns(struct netns_knob *netns_knob, const char *netns)
> +{
> +    sprintf(netns_knob->nspath, "/proc/%d/ns/net", getpid());
> +    netns_knob->main_fd = open(netns_knob->nspath, O_RDONLY);
> +    if (netns_knob->main_fd < 0) {
> +        return errno;
> +    }
> +
> +    sprintf(netns_knob->nspath, "/var/run/netns/%s", netns);
> +    netns_knob->netns_fd = open(netns_knob->nspath, O_RDONLY);
> +    if (netns_knob->netns_fd < 0) {
> +        close(netns_knob->main_fd);
> +        return errno;
> +    }
> +    setns(netns_knob->netns_fd, 0);

I think this reassociates the entire thread with the namespace, right?
So whatever thread calls this will be moved to the netns.

But actually, that 0 type could also change to a mount / pid namespace as
well.  I think it should just be CLONE_NEWNET here and in the exit_netns
call.  That way we don't get some additional changes that are unexpected
(like cgroup or something).  We expect that the FD should refer to a
netns type.

> +    return 0;
> +}
> +
> +static void
> +exit_netns(struct netns_knob *netns_knob)
> +{
> +    setns(netns_knob->main_fd, 0);
> +    close(netns_knob->netns_fd);
> +    close(netns_knob->main_fd);
> +}
> +
>  /* Retrieves current device stats for 'netdev-tap' netdev or
>   * netdev-internal. */
>  static int
> @@ -2245,6 +2287,11 @@ netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      struct netdev_stats dev_stats;
>      int error;
> +    struct netns_knob netns_knob;
> +
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        error = enter_netns(&netns_knob, netdev->netns);

Shouldn't we handle this error?  Otherwise why even do the assignment?

> +    }
>  
>      ovs_mutex_lock(&netdev->mutex);
>      get_stats_via_vport(netdev_, stats);
> @@ -2298,6 +2345,10 @@ netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
>      stats->rx_dropped += netdev->rx_dropped;
>      ovs_mutex_unlock(&netdev->mutex);
>  
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        exit_netns(&netns_knob);

Because if the above fails, this will call setns() on an invalid FD.
And then I think the thread will be in a bad state.

> +    }
> +
>      return error;
>  }
>  
> @@ -6245,6 +6296,7 @@ netdev_stats_from_rtnl_link_stats64(struct netdev_stats *dst,
>  int
>  get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
>  {
> +    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      struct ofpbuf request;
>      struct ofpbuf *reply;
>      int error;
> @@ -6258,7 +6310,11 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
>                          RTM_GETLINK, NLM_F_REQUEST);
>      ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
>      nl_msg_put_string(&request, IFLA_IFNAME, netdev_get_name(netdev_));
> -    error = nl_transact(NETLINK_ROUTE, &request, &reply);
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        error = nl_transact_nopool(NETLINK_ROUTE, &request, &reply);
> +    } else {
> +        error = nl_transact(NETLINK_ROUTE, &request, &reply);
> +    }
>      ofpbuf_uninit(&request);
>      if (error) {
>          return error;
> diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
> index 47077e9..2377ca7 100644
> --- a/lib/netlink-socket.c
> +++ b/lib/netlink-socket.c
> @@ -1807,6 +1807,152 @@ nl_transact(int protocol, const struct ofpbuf *request,
>      return error;
>  }
>  
> +static int
> +nl_sock_create_nopool(int protocol, struct nl_sock **sockp)
> +{
> +    struct nl_sock *sock;
> +#ifndef _WIN32
> +    struct sockaddr_nl local, remote;
> +#endif
> +    socklen_t local_size;
> +    int rcvbuf;
> +    int retval = 0;
> +
> +    *sockp = NULL;
> +    sock = xmalloc(sizeof *sock);
> +
> +#ifdef _WIN32
> +    sock->overlapped.hEvent = NULL;
> +    sock->handle = CreateFile(OVS_DEVICE_NAME_USER,
> +                              GENERIC_READ | GENERIC_WRITE,
> +                              FILE_SHARE_READ | FILE_SHARE_WRITE,
> +                              NULL, OPEN_EXISTING,
> +                              FILE_FLAG_OVERLAPPED, NULL);
> +
> +    if (sock->handle == INVALID_HANDLE_VALUE) {
> +        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> +        goto error;
> +    }
> +
> +    memset(&sock->overlapped, 0, sizeof sock->overlapped);
> +    sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
> +    if (sock->overlapped.hEvent == NULL) {
> +        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> +        goto error;
> +    }
> +    /* Initialize the type/ioctl to Generic */
> +    sock->read_ioctl = OVS_IOCTL_READ;
> +#else
> +    sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
> +    if (sock->fd < 0) {
> +        VLOG_ERR("fcntl: %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +#endif
> +
> +    sock->protocol = protocol;
> +    sock->next_seq = 1;
> +
> +    rcvbuf = 1024 * 1024;
> +#ifdef _WIN32
> +    sock->rcvbuf = rcvbuf;
> +    retval = get_sock_pid_from_kernel(sock);
> +    if (retval != 0) {
> +        goto error;
> +    }
> +    retval = set_sock_property(sock);
> +    if (retval != 0) {
> +        goto error;
> +    }
> +#else
> +    if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
> +                   &rcvbuf, sizeof rcvbuf)) {
> +        /* Only root can use SO_RCVBUFFORCE.  Everyone else gets EPERM.
> +         * Warn only if the failure is therefore unexpected. */
> +        if (errno != EPERM) {
> +            VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed "
> +                         "(%s)", rcvbuf, ovs_strerror(errno));
> +        }
> +    }
> +
> +    retval = get_socket_rcvbuf(sock->fd);
> +    if (retval < 0) {
> +        retval = -retval;
> +        goto error;
> +    }
> +    sock->rcvbuf = retval;
> +    retval = 0;
> +
> +    /* Connect to kernel (pid 0) as remote address. */
> +    memset(&remote, 0, sizeof remote);
> +    remote.nl_family = AF_NETLINK;
> +    remote.nl_pid = 0;
> +    if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) {
> +        VLOG_ERR("connect(0): %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +
> +    /* Obtain pid assigned by kernel. */
> +    local_size = sizeof local;
> +    if (getsockname(sock->fd, (struct sockaddr *) &local, &local_size) < 0) {
> +        VLOG_ERR("getsockname: %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +    if (local_size < sizeof local || local.nl_family != AF_NETLINK) {
> +        VLOG_ERR("getsockname returned bad Netlink name");
> +        retval = EINVAL;
> +        goto error;
> +    }
> +    sock->pid = local.nl_pid;
> +#endif
> +
> +    *sockp = sock;
> +    return 0;
> +
> +error:
> +    if (retval == 0) {
> +        retval = errno;
> +        if (retval == 0) {
> +            retval = EINVAL;
> +        }
> +    }
> +#ifdef _WIN32
> +    if (sock->overlapped.hEvent) {
> +        CloseHandle(sock->overlapped.hEvent);
> +    }
> +    if (sock->handle != INVALID_HANDLE_VALUE) {
> +        CloseHandle(sock->handle);
> +    }
> +#else
> +    if (sock->fd >= 0) {
> +        close(sock->fd);
> +    }
> +#endif
> +    free(sock);
> +    return retval;
> +}
> +
> +int
> +nl_transact_nopool(int protocol, const struct ofpbuf *request,
> +            struct ofpbuf **replyp)
> +{
> +    struct nl_sock *sock;
> +    int error;
> +
> +    error = nl_sock_create_nopool(protocol, &sock);
> +    if (error) {
> +        if (replyp) {
> +            *replyp = NULL;
> +        }
> +        return error;
> +    }
> +
> +    error = nl_sock_transact(sock, request, replyp);
> +    nl_sock_destroy(sock);
> +
> +    return error;
> +}
> +
>  /* Sends the 'request' member of the 'n' transactions in 'transactions' on a
>   * Netlink socket for the given 'protocol' (e.g. NETLINK_ROUTE or
>   * NETLINK_GENERIC), in order, and receives responses to all of them.  Fills in
> diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
> index 7852ad0..08cbab8 100644
> --- a/lib/netlink-socket.h
> +++ b/lib/netlink-socket.h
> @@ -254,6 +254,8 @@ struct nl_transaction {
>  int nl_transact(int protocol, const struct ofpbuf *request,
>                  struct ofpbuf **replyp);
>  void nl_transact_multiple(int protocol, struct nl_transaction **, size_t n);
> +int nl_transact_nopool(int protocol, const struct ofpbuf *request,
> +                       struct ofpbuf **replyp);
>  
>  /* Table dumping. */
>  #define NL_DUMP_BUFSIZE         4096
Yi Yang (杨燚)-云服务集团 Aug. 26, 2020, 12:53 a.m. UTC | #2
Aaron, thank you so much for review, yes, I should handle error. For setns, I will check if it can work as you commented. I'll send V2 to fix your comments.

-----邮件原件-----
发件人: dev [mailto:ovs-dev-bounces@openvswitch.org] 代表 Aaron Conole
发送时间: 2020年8月25日 23:06
收件人: yang_y_yi@163.com
抄送: ovs-dev@openvswitch.org; i.maximets@ovn.org; fbl@sysclose.org
主题: Re: [ovs-dev] [PATCH V1 3/4] Fix tap interface statistics issue

yang_y_yi@163.com writes:

> From: Yi Yang <yangyi01@inspur.com>
>
> After tap interface is moved to network namespace, "ovs-vsctl list 
> interface tapXXX" can get statistics info of tap interface, the root 
> cause is OVS still gets statistics info in root namespace.
>
> With netns option help, OVS can get statistics info in tap interface 
> netns.
>
> This patch added enter and exit netns helpers and change 
> statistics-related functions for those tap interfaces which have been 
> moved into netns and make sure "ovs-vsctl list interface tapXXX" can 
> get statistics info correctly.
>
> Here is a result sample for reference:
> name                : tap1
> ofport              : 4
> ofport_request      : []
> options             : {netns=ns01}
> other_config        : {}
> statistics          : {rx_bytes=6228, rx_packets=68, tx_bytes=8310, tx_packets=95}
> status              : {}
> type                : tap
>
> Signed-off-by: Yi Yang <yangyi01@inspur.com>
> ---
>  lib/dpif-netlink.c   |  51 ++++++++++++++++++
>  lib/dpif-netlink.h   |   3 ++
>  lib/netdev-linux.c   |  60 ++++++++++++++++++++-
>  lib/netlink-socket.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/netlink-socket.h |   2 +
>  5 files changed, 260 insertions(+), 2 deletions(-)
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 
> 7da4fb5..8ed37ed 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -4282,6 +4282,43 @@ dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
>      return error;
>  }
>  
> +static int
> +dpif_netlink_vport_transact_nopool(const struct dpif_netlink_vport *request,
> +                                   struct dpif_netlink_vport *reply,
> +                                   struct ofpbuf **bufp) {
> +    struct ofpbuf *request_buf;
> +    int error;
> +
> +    ovs_assert((reply != NULL) == (bufp != NULL));
> +
> +    error = dpif_netlink_init();
> +    if (error) {
> +        if (reply) {
> +            *bufp = NULL;
> +            dpif_netlink_vport_init(reply);
> +        }
> +        return error;
> +    }
> +
> +    request_buf = ofpbuf_new(1024);
> +    dpif_netlink_vport_to_ofpbuf(request, request_buf);
> +    error = nl_transact_nopool(NETLINK_GENERIC, request_buf, bufp);
> +    ofpbuf_delete(request_buf);
> +
> +    if (reply) {
> +        if (!error) {
> +            error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
> +        }
> +        if (error) {
> +            dpif_netlink_vport_init(reply);
> +            ofpbuf_delete(*bufp);
> +            *bufp = NULL;
> +        }
> +    }
> +    return error;
> +}
> +
>  /* Obtains information about the kernel vport named 'name' and stores it into
>   * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
>   * longer needed ('reply' will contain pointers into '*bufp').  */ @@ 
> -4298,6 +4335,20 @@ dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
>      return dpif_netlink_vport_transact(&request, reply, bufp);  }
>  
> +int
> +dpif_netlink_vport_get_nopool(const char *name,
> +                              struct dpif_netlink_vport *reply,
> +                              struct ofpbuf **bufp) {
> +    struct dpif_netlink_vport request;
> +
> +    dpif_netlink_vport_init(&request);
> +    request.cmd = OVS_VPORT_CMD_GET;
> +    request.name = name;
> +
> +    return dpif_netlink_vport_transact_nopool(&request, reply, bufp); 
> +}
> +
>  /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
>   * by Netlink attributes, into 'dp'.  Returns 0 if successful, otherwise a
>   * positive errno value.
> diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h index 
> 24294bc..9372241 100644
> --- a/lib/dpif-netlink.h
> +++ b/lib/dpif-netlink.h
> @@ -55,6 +55,9 @@ int dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
>                                  struct ofpbuf **bufp);  int 
> dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
>                             struct ofpbuf **bufp);
> +int dpif_netlink_vport_get_nopool(const char *name,
> +                                  struct dpif_netlink_vport *reply,
> +                                  struct ofpbuf **bufp);
>  
>  bool dpif_netlink_is_internal_device(const char *name);
>  
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 
> bb3aa9b..2a7d5ec 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -520,8 +520,16 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>   * changes in the device miimon status, so we can use atomic_count. 
> */  static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>  
> +struct netns_knob {
> +    int main_fd;
> +    int netns_fd;
> +    char nspath[128];
> +};
> +
>  static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);  static 
> void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> +static int enter_netns(struct netns_knob *netns_knob, const char 
> +*netns); static void exit_netns(struct netns_knob *netns_knob);
>  static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
>                                     int cmd, const char *cmd_name);  
> static int get_flags(const struct netdev *, unsigned int *flags); @@ 
> -2158,11 +2166,18 @@ netdev_stats_from_ovs_vport_stats(struct 
> netdev_stats *dst,  static int  get_stats_via_vport__(const struct 
> netdev *netdev, struct netdev_stats *stats)  {
> +    struct netdev_linux *netdev_linux = netdev_linux_cast(netdev);
>      struct dpif_netlink_vport reply;
>      struct ofpbuf *buf;
>      int error;
>  
> -    error = dpif_netlink_vport_get(netdev_get_name(netdev), &reply, &buf);
> +    if (is_tap_netdev(netdev) && (netdev_linux->netns != NULL)) {
> +        error = dpif_netlink_vport_get_nopool(netdev_get_name(netdev),
> +                                              &reply, &buf);
> +    } else {
> +        error = dpif_netlink_vport_get(netdev_get_name(netdev),
> +                                              &reply, &buf);
> +    }
>      if (error) {
>          return error;
>      } else if (!reply.stats) {
> @@ -2237,6 +2252,33 @@ netdev_linux_get_stats(const struct netdev *netdev_,
>      return error;
>  }
>  
> +static int
> +enter_netns(struct netns_knob *netns_knob, const char *netns) {
> +    sprintf(netns_knob->nspath, "/proc/%d/ns/net", getpid());
> +    netns_knob->main_fd = open(netns_knob->nspath, O_RDONLY);
> +    if (netns_knob->main_fd < 0) {
> +        return errno;
> +    }
> +
> +    sprintf(netns_knob->nspath, "/var/run/netns/%s", netns);
> +    netns_knob->netns_fd = open(netns_knob->nspath, O_RDONLY);
> +    if (netns_knob->netns_fd < 0) {
> +        close(netns_knob->main_fd);
> +        return errno;
> +    }
> +    setns(netns_knob->netns_fd, 0);

I think this reassociates the entire thread with the namespace, right?
So whatever thread calls this will be moved to the netns.

But actually, that 0 type could also change to a mount / pid namespace as well.  I think it should just be CLONE_NEWNET here and in the exit_netns call.  That way we don't get some additional changes that are unexpected (like cgroup or something).  We expect that the FD should refer to a netns type.

> +    return 0;
> +}
> +
> +static void
> +exit_netns(struct netns_knob *netns_knob) {
> +    setns(netns_knob->main_fd, 0);
> +    close(netns_knob->netns_fd);
> +    close(netns_knob->main_fd);
> +}
> +
>  /* Retrieves current device stats for 'netdev-tap' netdev or
>   * netdev-internal. */
>  static int
> @@ -2245,6 +2287,11 @@ netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      struct netdev_stats dev_stats;
>      int error;
> +    struct netns_knob netns_knob;
> +
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        error = enter_netns(&netns_knob, netdev->netns);

Shouldn't we handle this error?  Otherwise why even do the assignment?

> +    }
>  
>      ovs_mutex_lock(&netdev->mutex);
>      get_stats_via_vport(netdev_, stats); @@ -2298,6 +2345,10 @@ 
> netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
>      stats->rx_dropped += netdev->rx_dropped;
>      ovs_mutex_unlock(&netdev->mutex);
>  
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        exit_netns(&netns_knob);

Because if the above fails, this will call setns() on an invalid FD.
And then I think the thread will be in a bad state.

> +    }
> +
>      return error;
>  }
>  
> @@ -6245,6 +6296,7 @@ netdev_stats_from_rtnl_link_stats64(struct 
> netdev_stats *dst,  int  get_stats_via_netlink(const struct netdev 
> *netdev_, struct netdev_stats *stats)  {
> +    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      struct ofpbuf request;
>      struct ofpbuf *reply;
>      int error;
> @@ -6258,7 +6310,11 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
>                          RTM_GETLINK, NLM_F_REQUEST);
>      ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
>      nl_msg_put_string(&request, IFLA_IFNAME, netdev_get_name(netdev_));
> -    error = nl_transact(NETLINK_ROUTE, &request, &reply);
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        error = nl_transact_nopool(NETLINK_ROUTE, &request, &reply);
> +    } else {
> +        error = nl_transact(NETLINK_ROUTE, &request, &reply);
> +    }
>      ofpbuf_uninit(&request);
>      if (error) {
>          return error;
> diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 
> 47077e9..2377ca7 100644
> --- a/lib/netlink-socket.c
> +++ b/lib/netlink-socket.c
> @@ -1807,6 +1807,152 @@ nl_transact(int protocol, const struct ofpbuf *request,
>      return error;
>  }
>  
> +static int
> +nl_sock_create_nopool(int protocol, struct nl_sock **sockp) {
> +    struct nl_sock *sock;
> +#ifndef _WIN32
> +    struct sockaddr_nl local, remote; #endif
> +    socklen_t local_size;
> +    int rcvbuf;
> +    int retval = 0;
> +
> +    *sockp = NULL;
> +    sock = xmalloc(sizeof *sock);
> +
> +#ifdef _WIN32
> +    sock->overlapped.hEvent = NULL;
> +    sock->handle = CreateFile(OVS_DEVICE_NAME_USER,
> +                              GENERIC_READ | GENERIC_WRITE,
> +                              FILE_SHARE_READ | FILE_SHARE_WRITE,
> +                              NULL, OPEN_EXISTING,
> +                              FILE_FLAG_OVERLAPPED, NULL);
> +
> +    if (sock->handle == INVALID_HANDLE_VALUE) {
> +        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> +        goto error;
> +    }
> +
> +    memset(&sock->overlapped, 0, sizeof sock->overlapped);
> +    sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
> +    if (sock->overlapped.hEvent == NULL) {
> +        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> +        goto error;
> +    }
> +    /* Initialize the type/ioctl to Generic */
> +    sock->read_ioctl = OVS_IOCTL_READ; #else
> +    sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
> +    if (sock->fd < 0) {
> +        VLOG_ERR("fcntl: %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +#endif
> +
> +    sock->protocol = protocol;
> +    sock->next_seq = 1;
> +
> +    rcvbuf = 1024 * 1024;
> +#ifdef _WIN32
> +    sock->rcvbuf = rcvbuf;
> +    retval = get_sock_pid_from_kernel(sock);
> +    if (retval != 0) {
> +        goto error;
> +    }
> +    retval = set_sock_property(sock);
> +    if (retval != 0) {
> +        goto error;
> +    }
> +#else
> +    if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
> +                   &rcvbuf, sizeof rcvbuf)) {
> +        /* Only root can use SO_RCVBUFFORCE.  Everyone else gets EPERM.
> +         * Warn only if the failure is therefore unexpected. */
> +        if (errno != EPERM) {
> +            VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed "
> +                         "(%s)", rcvbuf, ovs_strerror(errno));
> +        }
> +    }
> +
> +    retval = get_socket_rcvbuf(sock->fd);
> +    if (retval < 0) {
> +        retval = -retval;
> +        goto error;
> +    }
> +    sock->rcvbuf = retval;
> +    retval = 0;
> +
> +    /* Connect to kernel (pid 0) as remote address. */
> +    memset(&remote, 0, sizeof remote);
> +    remote.nl_family = AF_NETLINK;
> +    remote.nl_pid = 0;
> +    if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) {
> +        VLOG_ERR("connect(0): %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +
> +    /* Obtain pid assigned by kernel. */
> +    local_size = sizeof local;
> +    if (getsockname(sock->fd, (struct sockaddr *) &local, &local_size) < 0) {
> +        VLOG_ERR("getsockname: %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +    if (local_size < sizeof local || local.nl_family != AF_NETLINK) {
> +        VLOG_ERR("getsockname returned bad Netlink name");
> +        retval = EINVAL;
> +        goto error;
> +    }
> +    sock->pid = local.nl_pid;
> +#endif
> +
> +    *sockp = sock;
> +    return 0;
> +
> +error:
> +    if (retval == 0) {
> +        retval = errno;
> +        if (retval == 0) {
> +            retval = EINVAL;
> +        }
> +    }
> +#ifdef _WIN32
> +    if (sock->overlapped.hEvent) {
> +        CloseHandle(sock->overlapped.hEvent);
> +    }
> +    if (sock->handle != INVALID_HANDLE_VALUE) {
> +        CloseHandle(sock->handle);
> +    }
> +#else
> +    if (sock->fd >= 0) {
> +        close(sock->fd);
> +    }
> +#endif
> +    free(sock);
> +    return retval;
> +}
> +
> +int
> +nl_transact_nopool(int protocol, const struct ofpbuf *request,
> +            struct ofpbuf **replyp)
> +{
> +    struct nl_sock *sock;
> +    int error;
> +
> +    error = nl_sock_create_nopool(protocol, &sock);
> +    if (error) {
> +        if (replyp) {
> +            *replyp = NULL;
> +        }
> +        return error;
> +    }
> +
> +    error = nl_sock_transact(sock, request, replyp);
> +    nl_sock_destroy(sock);
> +
> +    return error;
> +}
> +
>  /* Sends the 'request' member of the 'n' transactions in 'transactions' on a
>   * Netlink socket for the given 'protocol' (e.g. NETLINK_ROUTE or
>   * NETLINK_GENERIC), in order, and receives responses to all of them.  
> Fills in diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h 
> index 7852ad0..08cbab8 100644
> --- a/lib/netlink-socket.h
> +++ b/lib/netlink-socket.h
> @@ -254,6 +254,8 @@ struct nl_transaction {  int nl_transact(int 
> protocol, const struct ofpbuf *request,
>                  struct ofpbuf **replyp);  void 
> nl_transact_multiple(int protocol, struct nl_transaction **, size_t 
> n);
> +int nl_transact_nopool(int protocol, const struct ofpbuf *request,
> +                       struct ofpbuf **replyp);
>  
>  /* Table dumping. */
>  #define NL_DUMP_BUFSIZE         4096
diff mbox series

Patch

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 7da4fb5..8ed37ed 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -4282,6 +4282,43 @@  dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
     return error;
 }
 
+static int
+dpif_netlink_vport_transact_nopool(const struct dpif_netlink_vport *request,
+                                   struct dpif_netlink_vport *reply,
+                                   struct ofpbuf **bufp)
+{
+    struct ofpbuf *request_buf;
+    int error;
+
+    ovs_assert((reply != NULL) == (bufp != NULL));
+
+    error = dpif_netlink_init();
+    if (error) {
+        if (reply) {
+            *bufp = NULL;
+            dpif_netlink_vport_init(reply);
+        }
+        return error;
+    }
+
+    request_buf = ofpbuf_new(1024);
+    dpif_netlink_vport_to_ofpbuf(request, request_buf);
+    error = nl_transact_nopool(NETLINK_GENERIC, request_buf, bufp);
+    ofpbuf_delete(request_buf);
+
+    if (reply) {
+        if (!error) {
+            error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
+        }
+        if (error) {
+            dpif_netlink_vport_init(reply);
+            ofpbuf_delete(*bufp);
+            *bufp = NULL;
+        }
+    }
+    return error;
+}
+
 /* Obtains information about the kernel vport named 'name' and stores it into
  * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
  * longer needed ('reply' will contain pointers into '*bufp').  */
@@ -4298,6 +4335,20 @@  dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
     return dpif_netlink_vport_transact(&request, reply, bufp);
 }
 
+int
+dpif_netlink_vport_get_nopool(const char *name,
+                              struct dpif_netlink_vport *reply,
+                              struct ofpbuf **bufp)
+{
+    struct dpif_netlink_vport request;
+
+    dpif_netlink_vport_init(&request);
+    request.cmd = OVS_VPORT_CMD_GET;
+    request.name = name;
+
+    return dpif_netlink_vport_transact_nopool(&request, reply, bufp);
+}
+
 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
  * by Netlink attributes, into 'dp'.  Returns 0 if successful, otherwise a
  * positive errno value.
diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h
index 24294bc..9372241 100644
--- a/lib/dpif-netlink.h
+++ b/lib/dpif-netlink.h
@@ -55,6 +55,9 @@  int dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
                                 struct ofpbuf **bufp);
 int dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
                            struct ofpbuf **bufp);
+int dpif_netlink_vport_get_nopool(const char *name,
+                                  struct dpif_netlink_vport *reply,
+                                  struct ofpbuf **bufp);
 
 bool dpif_netlink_is_internal_device(const char *name);
 
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index bb3aa9b..2a7d5ec 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -520,8 +520,16 @@  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
  * changes in the device miimon status, so we can use atomic_count. */
 static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
 
+struct netns_knob {
+    int main_fd;
+    int netns_fd;
+    char nspath[128];
+};
+
 static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
 static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
+static int enter_netns(struct netns_knob *netns_knob, const char *netns);
+static void exit_netns(struct netns_knob *netns_knob);
 static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
                                    int cmd, const char *cmd_name);
 static int get_flags(const struct netdev *, unsigned int *flags);
@@ -2158,11 +2166,18 @@  netdev_stats_from_ovs_vport_stats(struct netdev_stats *dst,
 static int
 get_stats_via_vport__(const struct netdev *netdev, struct netdev_stats *stats)
 {
+    struct netdev_linux *netdev_linux = netdev_linux_cast(netdev);
     struct dpif_netlink_vport reply;
     struct ofpbuf *buf;
     int error;
 
-    error = dpif_netlink_vport_get(netdev_get_name(netdev), &reply, &buf);
+    if (is_tap_netdev(netdev) && (netdev_linux->netns != NULL)) {
+        error = dpif_netlink_vport_get_nopool(netdev_get_name(netdev),
+                                              &reply, &buf);
+    } else {
+        error = dpif_netlink_vport_get(netdev_get_name(netdev),
+                                              &reply, &buf);
+    }
     if (error) {
         return error;
     } else if (!reply.stats) {
@@ -2237,6 +2252,33 @@  netdev_linux_get_stats(const struct netdev *netdev_,
     return error;
 }
 
+static int
+enter_netns(struct netns_knob *netns_knob, const char *netns)
+{
+    sprintf(netns_knob->nspath, "/proc/%d/ns/net", getpid());
+    netns_knob->main_fd = open(netns_knob->nspath, O_RDONLY);
+    if (netns_knob->main_fd < 0) {
+        return errno;
+    }
+
+    sprintf(netns_knob->nspath, "/var/run/netns/%s", netns);
+    netns_knob->netns_fd = open(netns_knob->nspath, O_RDONLY);
+    if (netns_knob->netns_fd < 0) {
+        close(netns_knob->main_fd);
+        return errno;
+    }
+    setns(netns_knob->netns_fd, 0);
+    return 0;
+}
+
+static void
+exit_netns(struct netns_knob *netns_knob)
+{
+    setns(netns_knob->main_fd, 0);
+    close(netns_knob->netns_fd);
+    close(netns_knob->main_fd);
+}
+
 /* Retrieves current device stats for 'netdev-tap' netdev or
  * netdev-internal. */
 static int
@@ -2245,6 +2287,11 @@  netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     struct netdev_stats dev_stats;
     int error;
+    struct netns_knob netns_knob;
+
+    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
+        error = enter_netns(&netns_knob, netdev->netns);
+    }
 
     ovs_mutex_lock(&netdev->mutex);
     get_stats_via_vport(netdev_, stats);
@@ -2298,6 +2345,10 @@  netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
     stats->rx_dropped += netdev->rx_dropped;
     ovs_mutex_unlock(&netdev->mutex);
 
+    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
+        exit_netns(&netns_knob);
+    }
+
     return error;
 }
 
@@ -6245,6 +6296,7 @@  netdev_stats_from_rtnl_link_stats64(struct netdev_stats *dst,
 int
 get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
 {
+    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     struct ofpbuf request;
     struct ofpbuf *reply;
     int error;
@@ -6258,7 +6310,11 @@  get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
                         RTM_GETLINK, NLM_F_REQUEST);
     ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
     nl_msg_put_string(&request, IFLA_IFNAME, netdev_get_name(netdev_));
-    error = nl_transact(NETLINK_ROUTE, &request, &reply);
+    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
+        error = nl_transact_nopool(NETLINK_ROUTE, &request, &reply);
+    } else {
+        error = nl_transact(NETLINK_ROUTE, &request, &reply);
+    }
     ofpbuf_uninit(&request);
     if (error) {
         return error;
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 47077e9..2377ca7 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -1807,6 +1807,152 @@  nl_transact(int protocol, const struct ofpbuf *request,
     return error;
 }
 
+static int
+nl_sock_create_nopool(int protocol, struct nl_sock **sockp)
+{
+    struct nl_sock *sock;
+#ifndef _WIN32
+    struct sockaddr_nl local, remote;
+#endif
+    socklen_t local_size;
+    int rcvbuf;
+    int retval = 0;
+
+    *sockp = NULL;
+    sock = xmalloc(sizeof *sock);
+
+#ifdef _WIN32
+    sock->overlapped.hEvent = NULL;
+    sock->handle = CreateFile(OVS_DEVICE_NAME_USER,
+                              GENERIC_READ | GENERIC_WRITE,
+                              FILE_SHARE_READ | FILE_SHARE_WRITE,
+                              NULL, OPEN_EXISTING,
+                              FILE_FLAG_OVERLAPPED, NULL);
+
+    if (sock->handle == INVALID_HANDLE_VALUE) {
+        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
+        goto error;
+    }
+
+    memset(&sock->overlapped, 0, sizeof sock->overlapped);
+    sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+    if (sock->overlapped.hEvent == NULL) {
+        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
+        goto error;
+    }
+    /* Initialize the type/ioctl to Generic */
+    sock->read_ioctl = OVS_IOCTL_READ;
+#else
+    sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+    if (sock->fd < 0) {
+        VLOG_ERR("fcntl: %s", ovs_strerror(errno));
+        goto error;
+    }
+#endif
+
+    sock->protocol = protocol;
+    sock->next_seq = 1;
+
+    rcvbuf = 1024 * 1024;
+#ifdef _WIN32
+    sock->rcvbuf = rcvbuf;
+    retval = get_sock_pid_from_kernel(sock);
+    if (retval != 0) {
+        goto error;
+    }
+    retval = set_sock_property(sock);
+    if (retval != 0) {
+        goto error;
+    }
+#else
+    if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
+                   &rcvbuf, sizeof rcvbuf)) {
+        /* Only root can use SO_RCVBUFFORCE.  Everyone else gets EPERM.
+         * Warn only if the failure is therefore unexpected. */
+        if (errno != EPERM) {
+            VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed "
+                         "(%s)", rcvbuf, ovs_strerror(errno));
+        }
+    }
+
+    retval = get_socket_rcvbuf(sock->fd);
+    if (retval < 0) {
+        retval = -retval;
+        goto error;
+    }
+    sock->rcvbuf = retval;
+    retval = 0;
+
+    /* Connect to kernel (pid 0) as remote address. */
+    memset(&remote, 0, sizeof remote);
+    remote.nl_family = AF_NETLINK;
+    remote.nl_pid = 0;
+    if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) {
+        VLOG_ERR("connect(0): %s", ovs_strerror(errno));
+        goto error;
+    }
+
+    /* Obtain pid assigned by kernel. */
+    local_size = sizeof local;
+    if (getsockname(sock->fd, (struct sockaddr *) &local, &local_size) < 0) {
+        VLOG_ERR("getsockname: %s", ovs_strerror(errno));
+        goto error;
+    }
+    if (local_size < sizeof local || local.nl_family != AF_NETLINK) {
+        VLOG_ERR("getsockname returned bad Netlink name");
+        retval = EINVAL;
+        goto error;
+    }
+    sock->pid = local.nl_pid;
+#endif
+
+    *sockp = sock;
+    return 0;
+
+error:
+    if (retval == 0) {
+        retval = errno;
+        if (retval == 0) {
+            retval = EINVAL;
+        }
+    }
+#ifdef _WIN32
+    if (sock->overlapped.hEvent) {
+        CloseHandle(sock->overlapped.hEvent);
+    }
+    if (sock->handle != INVALID_HANDLE_VALUE) {
+        CloseHandle(sock->handle);
+    }
+#else
+    if (sock->fd >= 0) {
+        close(sock->fd);
+    }
+#endif
+    free(sock);
+    return retval;
+}
+
+int
+nl_transact_nopool(int protocol, const struct ofpbuf *request,
+            struct ofpbuf **replyp)
+{
+    struct nl_sock *sock;
+    int error;
+
+    error = nl_sock_create_nopool(protocol, &sock);
+    if (error) {
+        if (replyp) {
+            *replyp = NULL;
+        }
+        return error;
+    }
+
+    error = nl_sock_transact(sock, request, replyp);
+    nl_sock_destroy(sock);
+
+    return error;
+}
+
 /* Sends the 'request' member of the 'n' transactions in 'transactions' on a
  * Netlink socket for the given 'protocol' (e.g. NETLINK_ROUTE or
  * NETLINK_GENERIC), in order, and receives responses to all of them.  Fills in
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index 7852ad0..08cbab8 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -254,6 +254,8 @@  struct nl_transaction {
 int nl_transact(int protocol, const struct ofpbuf *request,
                 struct ofpbuf **replyp);
 void nl_transact_multiple(int protocol, struct nl_transaction **, size_t n);
+int nl_transact_nopool(int protocol, const struct ofpbuf *request,
+                       struct ofpbuf **replyp);
 
 /* Table dumping. */
 #define NL_DUMP_BUFSIZE         4096