diff mbox series

[ovs-dev,v6,2/2] dpif-netlink: distribute polling to discreet handlers

Message ID 20200907083755.11549-3-mark.d.gray@redhat.com
State Changes Requested
Headers show
Series dpif-netlink: distribute polling to discreet handlers | expand

Commit Message

Mark Gray Sept. 7, 2020, 8:37 a.m. UTC
From: Aaron Conole <aconole@redhat.com>

Currently, the channel handlers are polled globally.  On some
systems, this causes a thundering herd issue where multiple
handler threads become active, only to do no work and immediately
sleep.

The approach here is to push the netlink socket channels to discreet
handler threads to process, rather than polling on every thread.
This will eliminate the need to wake multiple threads.

To check:

  ip netns add left
  ip netns add right
  ip link add center-left type veth peer name left0 netns left
  ip link add center-right type veth peer name right0 netns right
  ip link set center-left up
  ip link set center-right up
  ip -n left ip link set left0 up
  ip -n left ip addr add 172.31.110.10/24 dev left0
  ip -n right ip link set right0 up
  ip -n right ip addr add 172.31.110.11/24 dev right0

  ovs-vsctl add-br br0
  ovs-vsctl add-port br0 center-right
  ovs-vsctl add-port br0 center-left

  # in one terminal
  perf record -e sched:sched_wakeup,irq:softirq_entry -ag

  # in a separate terminal
  ip netns exec left arping -I left0 -c 1 172.31.110.11

  # in the perf terminal after exiting
  perf script

Look for the number of 'handler' threads which were made active.

Suggested-by: Ben Pfaff <blp@ovn.org>
Co-authored-by: Mark Gray <mark.d.gray@redhat.com>
Reported-by: David Ahern <dsahern@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
Cc: Matteo Croce <technoboy85@gmail.com>
Cc: Flavio Leitner <fbl@sysclose.org>
Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
---
 lib/dpif-netlink.c | 320 +++++++++++++++++++++++++--------------------
 1 file changed, 178 insertions(+), 142 deletions(-)

Comments

Flavio Leitner Sept. 7, 2020, 10:31 p.m. UTC | #1
Hi Mark,

Thanks again for the patch.

I think the id-pool patch could be merged to this one since we
usually merge the API change with its new user in the same patch.

See my other comments inline


On Mon, Sep 07, 2020 at 09:37:55AM +0100, Mark Gray wrote:
> From: Aaron Conole <aconole@redhat.com>
> 
> Currently, the channel handlers are polled globally.  On some
> systems, this causes a thundering herd issue where multiple
> handler threads become active, only to do no work and immediately
> sleep.
> 
> The approach here is to push the netlink socket channels to discreet
> handler threads to process, rather than polling on every thread.
> This will eliminate the need to wake multiple threads.
> 
> To check:
> 
>   ip netns add left
>   ip netns add right
>   ip link add center-left type veth peer name left0 netns left
>   ip link add center-right type veth peer name right0 netns right
>   ip link set center-left up
>   ip link set center-right up
>   ip -n left ip link set left0 up
>   ip -n left ip addr add 172.31.110.10/24 dev left0
>   ip -n right ip link set right0 up
>   ip -n right ip addr add 172.31.110.11/24 dev right0
> 
>   ovs-vsctl add-br br0
>   ovs-vsctl add-port br0 center-right
>   ovs-vsctl add-port br0 center-left
> 
>   # in one terminal
>   perf record -e sched:sched_wakeup,irq:softirq_entry -ag
> 
>   # in a separate terminal
>   ip netns exec left arping -I left0 -c 1 172.31.110.11
> 
>   # in the perf terminal after exiting
>   perf script
> 
> Look for the number of 'handler' threads which were made active.
> 
> Suggested-by: Ben Pfaff <blp@ovn.org>
> Co-authored-by: Mark Gray <mark.d.gray@redhat.com>
> Reported-by: David Ahern <dsahern@gmail.com>
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
> Cc: Matteo Croce <technoboy85@gmail.com>
> Cc: Flavio Leitner <fbl@sysclose.org>
> Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
> Signed-off-by: Aaron Conole <aconole@redhat.com>
> Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
> ---
>  lib/dpif-netlink.c | 320 +++++++++++++++++++++++++--------------------
>  1 file changed, 178 insertions(+), 142 deletions(-)
> 
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 7da4fb54d..273083fcc 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -37,6 +37,7 @@
>  #include "dpif-provider.h"
>  #include "fat-rwlock.h"
>  #include "flow.h"
> +#include "id-pool.h"
>  #include "netdev-linux.h"
>  #include "netdev-offload.h"
>  #include "netdev-provider.h"
> @@ -160,6 +161,9 @@ static void dpif_netlink_flow_to_dpif_flow(struct dpif_flow *,
>  
>  /* One of the dpif channels between the kernel and userspace. */
>  struct dpif_channel {
> +    struct hmap_node hmap_node;
> +    struct dpif_handler *handler;
> +    uint32_t port_idx;
>      struct nl_sock *sock;       /* Netlink socket. */
>      long long int last_poll;    /* Last time this channel was polled. */
>  };
> @@ -183,6 +187,8 @@ struct dpif_handler {
>      int n_events;                 /* Num events returned by epoll_wait(). */
>      int event_offset;             /* Offset into 'epoll_events'. */
>  
> +    struct hmap channels;         /* map of channels for each port. */
> +
>  #ifdef _WIN32
>      /* Pool of sockets. */
>      struct dpif_windows_vport_sock *vport_sock_pool;
> @@ -201,9 +207,8 @@ struct dpif_netlink {
>      struct fat_rwlock upcall_lock;
>      struct dpif_handler *handlers;
>      uint32_t n_handlers;           /* Num of upcall handlers. */
> -    struct dpif_channel *channels; /* Array of channels for each port. */
> -    int uc_array_size;             /* Size of 'handler->channels' and */
> -                                   /* 'handler->epoll_events'. */
> +
> +    struct id_pool *port_ids;      /* Set of added port ids */
>  
>      /* Change notification. */
>      struct nl_sock *port_notifier; /* vport multicast group subscriber. */
> @@ -287,6 +292,27 @@ close_nl_sock(struct nl_sock *sock)
>  #endif
>  }
>  
> +static size_t
> +dpif_handler_channels_count(const struct dpif_handler *handler)
> +{
> +    return hmap_count(&handler->channels);
> +}
> +
> +static struct dpif_channel *
> +dpif_handler_channel_find(struct dpif_handler *handler, uint32_t idx)
> +{
> +    struct dpif_channel *channel;
> +
> +    HMAP_FOR_EACH_WITH_HASH (channel, hmap_node, hash_int(idx, 0),
> +                             &handler->channels) {
> +        if (channel->port_idx == idx) {
> +            return channel;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
>  static struct dpif_netlink *
>  dpif_netlink_cast(const struct dpif *dpif)
>  {
> @@ -385,6 +411,8 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
>  
>      dpif->dp_ifindex = dp->dp_ifindex;
>      dpif->user_features = dp->user_features;
> +    dpif->port_ids = id_pool_create(0, MAX_PORTS);
> +

Extra line not needed.

>      *dpifp = &dpif->dpif;
>  
>      return 0;
> @@ -452,161 +480,140 @@ static bool
>  vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
>                uint32_t *upcall_pid)
>  {
> -    /* Since the nl_sock can only be assigned in either all
> -     * or none "dpif" channels, the following check
> -     * would suffice. */
> -    if (!dpif->channels[port_idx].sock) {
> +    size_t i;

New empty line needed.

> +    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
> +
> +    if (!id_pool_has_id(dpif->port_ids, port_idx)) {
>          return false;
>      }
> -    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
>  
> -    *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
> +    /* The 'port_idx' should only be valid for a single handler. */
> +    for (i = 0; i < dpif->n_handlers; i++) {
> +        struct dpif_channel *channel =
> +            dpif_handler_channel_find(&dpif->handlers[i], port_idx);

Perhaps:
       for (i = 0; i < dpif->n_handlers; i++) {
           struct dpif_channel *channel;
 
           channel = dpif_handler_channel_find(&dpif->handlers[i], port_idx);
       [...]

>  
> -    return true;
> +        if (channel) {
> +            *upcall_pid = nl_sock_pid(channel->sock);
> +            return true;
> +        }
> +    }
> +
> +    return false;
>  }
>  
>  static int
>  vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
>                    struct nl_sock *sock)
>  {
> -    struct epoll_event event;
>      uint32_t port_idx = odp_to_u32(port_no);
> +    struct dpif_channel *channel;
> +    struct dpif_handler *handler;
> +    struct epoll_event event;
>      size_t i;
> -    int error;
>  
> -    if (dpif->handlers == NULL) {
> +    if (dpif->handlers == NULL ||
> +        id_pool_has_id(dpif->port_ids, port_idx)) {
>          close_nl_sock(sock);
> -        return 0;
> +        return EINVAL;
>      }
>  
> -    /* We assume that the datapath densely chooses port numbers, which can
> -     * therefore be used as an index into 'channels' and 'epoll_events' of
> -     * 'dpif'. */
> -    if (port_idx >= dpif->uc_array_size) {
> -        uint32_t new_size = port_idx + 1;
> -
> -        if (new_size > MAX_PORTS) {
> -            VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
> -                         dpif_name(&dpif->dpif), port_no);
> -            return EFBIG;
> -        }
> +    if (port_idx >= MAX_PORTS) {
> +        VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
> +                     dpif_name(&dpif->dpif), port_no);
> +        return EFBIG;
> +    }
>  
> -        dpif->channels = xrealloc(dpif->channels,
> -                                  new_size * sizeof *dpif->channels);
> +    handler = &dpif->handlers[0];
>  
> -        for (i = dpif->uc_array_size; i < new_size; i++) {
> -            dpif->channels[i].sock = NULL;
> +    for (i = 0; i < dpif->n_handlers; i++) {
> +        if (dpif_handler_channels_count(&dpif->handlers[i]) <
> +            dpif_handler_channels_count(handler)) {
> +            handler = &dpif->handlers[i];
>          }
> +    }
>  
> -        for (i = 0; i < dpif->n_handlers; i++) {
> -            struct dpif_handler *handler = &dpif->handlers[i];
> -
> -            handler->epoll_events = xrealloc(handler->epoll_events,
> -                new_size * sizeof *handler->epoll_events);
> +    channel = xmalloc(sizeof *channel);
> +    channel->port_idx = port_idx;
> +    channel->sock = sock;
> +    channel->last_poll = LLONG_MIN;
> +    channel->handler = handler;
> +    hmap_insert(&handler->channels, &channel->hmap_node,
> +                hash_int(port_idx, 0));
>  
> -        }
> -        dpif->uc_array_size = new_size;
> -    }
> +    handler->epoll_events = xrealloc(handler->epoll_events,
> +                                     dpif_handler_channels_count(handler) *
> +                                     sizeof *handler->epoll_events);
>  
>      memset(&event, 0, sizeof event);
>      event.events = EPOLLIN | EPOLLEXCLUSIVE;
>      event.data.u32 = port_idx;
>  
> -    for (i = 0; i < dpif->n_handlers; i++) {
> -        struct dpif_handler *handler = &dpif->handlers[i];
> -
> -#ifndef _WIN32
> -        if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
> -                      &event) < 0) {
> -            error = errno;
> -            goto error;
> -        }
> -#endif
> -    }
> -    dpif->channels[port_idx].sock = sock;
> -    dpif->channels[port_idx].last_poll = LLONG_MIN;
> -
> -    return 0;
> -
> -error:
>  #ifndef _WIN32
> -    while (i--) {
> -        epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
> -                  nl_sock_fd(sock), NULL);
> +    if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
> +                  &event) < 0) {
> +        hmap_remove(&handler->channels, &channel->hmap_node);
> +        free(channel);
> +        id_pool_free_id(dpif->port_ids, port_idx);
> +        return errno;
>      }
>  #endif
> -    dpif->channels[port_idx].sock = NULL;
>  
> -    return error;
> +    id_pool_add(dpif->port_ids, port_idx);
> +    return 0;
>  }
>  
>  static void
> -vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
> +vport_del_channel(struct dpif_netlink *dpif,
> +                  struct dpif_channel *channel)
>  {
> -    uint32_t port_idx = odp_to_u32(port_no);
> -    size_t i;
> -
> -    if (!dpif->handlers || port_idx >= dpif->uc_array_size
> -        || !dpif->channels[port_idx].sock) {
> -        return;
> -    }
> -
> -    for (i = 0; i < dpif->n_handlers; i++) {
> -        struct dpif_handler *handler = &dpif->handlers[i];
> +    if (channel) {
>  #ifndef _WIN32
> -        epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
> -                  nl_sock_fd(dpif->channels[port_idx].sock), NULL);
> +        epoll_ctl(channel->handler->epoll_fd, EPOLL_CTL_DEL,
> +                  nl_sock_fd(channel->sock), NULL);
> +        nl_sock_destroy(channel->sock);
>  #endif
> -        handler->event_offset = handler->n_events = 0;
> +        hmap_remove(&channel->handler->channels, &channel->hmap_node);
> +        id_pool_free_id(dpif->port_ids, channel->port_idx);
> +        channel->handler->event_offset = channel->handler->n_events = 0;
> +        free(channel);
>      }
> -#ifndef _WIN32
> -    nl_sock_destroy(dpif->channels[port_idx].sock);
> -#endif
> -    dpif->channels[port_idx].sock = NULL;
>  }


The above function could be simplified since almost all callers
check if channel is not null. The only one that does not check
should do it to break earlier. It's noted below.

Also, the double dereferencing can be removed.


static void
vport_del_channel(struct dpif_netlink *dpif,
                  struct dpif_channel *channel)
    OVS_REQ_WRLOCK(dpif->upcall_lock)
{
    struct dpif_handler *handler = channel->handler;

#ifndef _WIN32
    epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL, nl_sock_fd(channel->sock),
              NULL);
    nl_sock_destroy(channel->sock);
#endif
    hmap_remove(&handler->channels, &channel->hmap_node);
    id_pool_free_id(dpif->port_ids, channel->port_idx);
    handler->event_offset = handler->n_events = 0;
    free(channel);
}



>  
>  static void
>  destroy_all_channels(struct dpif_netlink *dpif)
>      OVS_REQ_WRLOCK(dpif->upcall_lock)
>  {
> -    unsigned int i;
> +    size_t i;
>  
>      if (!dpif->handlers) {
>          return;
>      }
>  
> -    for (i = 0; i < dpif->uc_array_size; i++ ) {
> -        struct dpif_netlink_vport vport_request;
> -        uint32_t upcall_pids = 0;
> -
> -        if (!dpif->channels[i].sock) {
> -            continue;
> -        }
> -
> -        /* Turn off upcalls. */
> -        dpif_netlink_vport_init(&vport_request);
> -        vport_request.cmd = OVS_VPORT_CMD_SET;
> -        vport_request.dp_ifindex = dpif->dp_ifindex;
> -        vport_request.port_no = u32_to_odp(i);
> -        vport_request.n_upcall_pids = 1;
> -        vport_request.upcall_pids = &upcall_pids;
> -        dpif_netlink_vport_transact(&vport_request, NULL, NULL);
> -
> -        vport_del_channels(dpif, u32_to_odp(i));
> -    }
> -
>      for (i = 0; i < dpif->n_handlers; i++) {
>          struct dpif_handler *handler = &dpif->handlers[i];
> +        struct dpif_channel *channel, *next;
> +
> +        HMAP_FOR_EACH_SAFE (channel, next, hmap_node, &handler->channels) {
> +            struct dpif_netlink_vport vport_request;
> +            uint32_t upcall_pids = 0;
> +            /* Turn off upcalls. */
> +            dpif_netlink_vport_init(&vport_request);
> +            vport_request.cmd = OVS_VPORT_CMD_SET;
> +            vport_request.dp_ifindex = dpif->dp_ifindex;
> +            vport_request.port_no = u32_to_odp(channel->port_idx);
> +            vport_request.n_upcall_pids = 1;
> +            vport_request.upcall_pids = &upcall_pids;
> +            dpif_netlink_vport_transact(&vport_request, NULL, NULL);
> +

Extra line not needed.

> +            vport_del_channel(dpif, channel);
> +        }
>  
>          dpif_netlink_handler_uninit(handler);
>          free(handler->epoll_events);
>      }
> -    free(dpif->channels);
> -    free(dpif->handlers);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

I think it is leaking dpif->handlers, correct?


> +
>      dpif->handlers = NULL;
> -    dpif->channels = NULL;
>      dpif->n_handlers = 0;
> -    dpif->uc_array_size = 0;
>  }
>  
>  static void
> @@ -618,9 +625,11 @@ dpif_netlink_close(struct dpif *dpif_)
>  
>      fat_rwlock_wrlock(&dpif->upcall_lock);
>      destroy_all_channels(dpif);
> +    id_pool_destroy(dpif->port_ids);
>      fat_rwlock_unlock(&dpif->upcall_lock);
>  
>      fat_rwlock_destroy(&dpif->upcall_lock);
> +
>      free(dpif);
>  }
>  
> @@ -981,8 +990,10 @@ dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
>      OVS_REQ_WRLOCK(dpif->upcall_lock)
>  {
>      struct dpif_netlink_vport vport;
> +    struct dpif_channel *channel = NULL;
>      struct dpif_port dpif_port;
>      int error;
> +    size_t i;
>  
>      error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
>      if (error) {
> @@ -1003,7 +1014,18 @@ dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
>  #endif
>      error = dpif_netlink_vport_transact(&vport, NULL, NULL);
>  
> -    vport_del_channels(dpif, port_no);
> +    for (i = 0; i < dpif->n_handlers; i++) {
> +        channel = dpif_handler_channel_find(&dpif->handlers[i],
> +                                            odp_to_u32(port_no));
> +        if (channel) {
> +            vport_del_channel(dpif, channel);
> +            break;
> +        }
> +    }
> +
> +    if (!channel) {
> +        return EINVAL;
> +    }
>  
>      if (!error && !ovs_tunnels_out_of_tree) {
>          error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
> @@ -1084,23 +1106,39 @@ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
>                              odp_port_t port_no)
>      OVS_REQ_RDLOCK(dpif->upcall_lock)
>  {
> +    struct dpif_channel *min_channel = NULL, *found_channel = NULL;
>      uint32_t port_idx = odp_to_u32(port_no);
>      uint32_t pid = 0;
> +    size_t i;
> +
> +    /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
> +     * channel, since it is not heavily loaded. */
> +    uint32_t idx = port_no == ODPP_NONE ? 0 : port_idx;
>  
> -    if (dpif->handlers && dpif->uc_array_size > 0) {
> -        /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
> -         * channel, since it is not heavily loaded. */
> -        uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
> -
> -        /* Needs to check in case the socket pointer is changed in between
> -         * the holding of upcall_lock.  A known case happens when the main
> -         * thread deletes the vport while the handler thread is handling
> -         * the upcall from that port. */
> -        if (dpif->channels[idx].sock) {
> -            pid = nl_sock_pid(dpif->channels[idx].sock);
> +    if (!id_pool_has_id(dpif->port_ids, port_idx)) {
> +        return 0;
> +    }
> +
> +    if (dpif->handlers) {
> +        for (i = 0; i < dpif->n_handlers; i++) {
> +            if (dpif_handler_channel_find(&dpif->handlers[i], idx)) {
> +                found_channel = dpif_handler_channel_find(&dpif->handlers[i],
> +                                                          idx);
> +                break;
> +            }
> +
> +            if (dpif_handler_channel_find(&dpif->handlers[i], 0)) {
> +                min_channel = dpif_handler_channel_find(&dpif->handlers[i], 0);
> +            }
>          }
>      }
>  
> +    if (found_channel) {
> +        pid = nl_sock_pid(found_channel->sock);
> +    } else if (min_channel) {
> +        pid = nl_sock_pid(min_channel->sock);
> +    }
> +
>      return pid;
>  }
>  
> @@ -2342,12 +2380,14 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
>  static void
>  dpif_netlink_handler_uninit(struct dpif_handler *handler)
>  {
> +    hmap_destroy(&handler->channels);
>      vport_delete_sock_pool(handler);
>  }
>  
>  static int
>  dpif_netlink_handler_init(struct dpif_handler *handler)
>  {
> +    hmap_init(&handler->channels);
>      return vport_create_sock_pool(handler);
>  }
>  #else
> @@ -2355,6 +2395,7 @@ dpif_netlink_handler_init(struct dpif_handler *handler)
>  static int
>  dpif_netlink_handler_init(struct dpif_handler *handler)
>  {
> +    hmap_init(&handler->channels);
>      handler->epoll_fd = epoll_create(10);
>      return handler->epoll_fd < 0 ? errno : 0;
>  }
> @@ -2362,6 +2403,7 @@ dpif_netlink_handler_init(struct dpif_handler *handler)
>  static void
>  dpif_netlink_handler_uninit(struct dpif_handler *handler)
>  {
> +    hmap_destroy(&handler->channels);
>      close(handler->epoll_fd);
>  }
>  #endif
> @@ -2374,12 +2416,11 @@ static int
>  dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>      OVS_REQ_WRLOCK(dpif->upcall_lock)
>  {
> -    unsigned long int *keep_channels;
>      struct dpif_netlink_vport vport;
> -    size_t keep_channels_nbits;
>      struct nl_dump dump;
>      uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
>      struct ofpbuf buf;
> +    struct dpif_channel *channel;
>      int retval = 0;
>      size_t i;
>  
> @@ -2412,13 +2453,9 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>  
>      for (i = 0; i < n_handlers; i++) {
>          struct dpif_handler *handler = &dpif->handlers[i];
> -
>          handler->event_offset = handler->n_events = 0;
>      }
>  
> -    keep_channels_nbits = dpif->uc_array_size;
> -    keep_channels = bitmap_allocate(keep_channels_nbits);
> -
>      ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
>      dpif_netlink_port_dump_start__(dpif, &dump);
>      while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
> @@ -2426,8 +2463,7 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>          uint32_t upcall_pid;
>          int error;
>  
> -        if (port_no >= dpif->uc_array_size
> -            || !vport_get_pid(dpif, port_no, &upcall_pid)) {
> +        if (!vport_get_pid(dpif, port_no, &upcall_pid)) {
>              struct nl_sock *sock;
>              error = create_nl_sock(dpif, &sock);
>  
> @@ -2475,25 +2511,17 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>              }
>          }
>  
> -        if (port_no < keep_channels_nbits) {
> -            bitmap_set1(keep_channels, port_no);
> -        }
>          continue;
>  
>      error:
> -        vport_del_channels(dpif, vport.port_no);
> +        for (i = 0; i < n_handlers; i++) {
> +            channel = dpif_handler_channel_find(&dpif->handlers[i], port_no);

Here is the place where it doesn't check if channel is NULL.

> +            vport_del_channel(dpif, channel);
> +        }

It could do instead:

        for (i = 0; i < n_handlers; i++) {
            channel = dpif_handler_channel_find(&dpif->handlers[i], port_no);
            if (channel) {
                vport_del_channel(dpif, channel);
                break;
            }
        }

which is similar in dpif_netlink_port_del__() and perhaps could
become a function.

What do you think?

fbl


>      }
>      nl_dump_done(&dump);
>      ofpbuf_uninit(&buf);
>  
> -    /* Discard any saved channels that we didn't reuse. */
> -    for (i = 0; i < keep_channels_nbits; i++) {
> -        if (!bitmap_is_set(keep_channels, i)) {
> -            vport_del_channels(dpif, u32_to_odp(i));
> -        }
> -    }
> -    free(keep_channels);
> -
>      return retval;
>  }
>  
> @@ -2714,6 +2742,8 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
>      OVS_REQ_RDLOCK(dpif->upcall_lock)
>  {
>      struct dpif_handler *handler;
> +    size_t n_channels;
> +
>      int read_tries = 0;
>  
>      if (!dpif->handlers || handler_id >= dpif->n_handlers) {
> @@ -2721,14 +2751,15 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
>      }
>  
>      handler = &dpif->handlers[handler_id];
> -    if (handler->event_offset >= handler->n_events) {
> +    n_channels = dpif_handler_channels_count(handler);
> +    if (handler->event_offset >= handler->n_events && n_channels) {
>          int retval;
>  
>          handler->event_offset = handler->n_events = 0;
>  
>          do {
>              retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
> -                                dpif->uc_array_size, 0);
> +                                n_channels, 0);
>          } while (retval < 0 && errno == EINTR);
>  
>          if (retval < 0) {
> @@ -2741,7 +2772,10 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
>  
>      while (handler->event_offset < handler->n_events) {
>          int idx = handler->epoll_events[handler->event_offset].data.u32;
> -        struct dpif_channel *ch = &dpif->channels[idx];
> +        struct dpif_channel *ch = dpif_handler_channel_find(handler, idx);
> +        if (!ch) {
> +            return EAGAIN;
> +        }
>  
>          handler->event_offset++;
>  
> @@ -2845,12 +2879,14 @@ dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
>      if (dpif->handlers) {
>          size_t i;
>  
> -        if (!dpif->channels[0].sock) {
> -            return;
> -        }
> -        for (i = 0; i < dpif->uc_array_size; i++ ) {
> +        for (i = 0; i < dpif->n_handlers; i++) {
> +            struct dpif_handler *handler = &dpif->handlers[i];
> +            struct dpif_channel *channel;
> +
> +            HMAP_FOR_EACH (channel, hmap_node, &handler->channels) {
> +                nl_sock_drain(channel->sock);
> +            }
>  
> -            nl_sock_drain(dpif->channels[i].sock);
>          }
>      }
>  }
> -- 
> 2.26.2
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Mark Gray Sept. 8, 2020, 3:45 p.m. UTC | #2
On 07/09/2020 23:31, Flavio Leitner wrote:
> 
> Hi Mark,
> 
> Thanks again for the patch.
> 
> I think the id-pool patch could be merged to this one since we
> usually merge the API change with its new user in the same patch.
> 
> See my other comments inline
> 
> 

Done. Ill remove my cover letter aswell :D.

> On Mon, Sep 07, 2020 at 09:37:55AM +0100, Mark Gray wrote:
>> From: Aaron Conole <aconole@redhat.com>
>>
>> Currently, the channel handlers are polled globally.  On some
>> systems, this causes a thundering herd issue where multiple
>> handler threads become active, only to do no work and immediately
>> sleep.
>>
>> The approach here is to push the netlink socket channels to discreet
>> handler threads to process, rather than polling on every thread.
>> This will eliminate the need to wake multiple threads.
>>
>> To check:
>>
>>   ip netns add left
>>   ip netns add right
>>   ip link add center-left type veth peer name left0 netns left
>>   ip link add center-right type veth peer name right0 netns right
>>   ip link set center-left up
>>   ip link set center-right up
>>   ip -n left ip link set left0 up
>>   ip -n left ip addr add 172.31.110.10/24 dev left0
>>   ip -n right ip link set right0 up
>>   ip -n right ip addr add 172.31.110.11/24 dev right0
>>
>>   ovs-vsctl add-br br0
>>   ovs-vsctl add-port br0 center-right
>>   ovs-vsctl add-port br0 center-left
>>
>>   # in one terminal
>>   perf record -e sched:sched_wakeup,irq:softirq_entry -ag
>>
>>   # in a separate terminal
>>   ip netns exec left arping -I left0 -c 1 172.31.110.11
>>
>>   # in the perf terminal after exiting
>>   perf script
>>
>> Look for the number of 'handler' threads which were made active.
>>
>> Suggested-by: Ben Pfaff <blp@ovn.org>
>> Co-authored-by: Mark Gray <mark.d.gray@redhat.com>
>> Reported-by: David Ahern <dsahern@gmail.com>
>> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
>> Cc: Matteo Croce <technoboy85@gmail.com>
>> Cc: Flavio Leitner <fbl@sysclose.org>
>> Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
>> Signed-off-by: Aaron Conole <aconole@redhat.com>
>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
>> ---
>>  lib/dpif-netlink.c | 320 +++++++++++++++++++++++++--------------------
>>  1 file changed, 178 insertions(+), 142 deletions(-)
>>
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 7da4fb54d..273083fcc 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -37,6 +37,7 @@
>>  #include "dpif-provider.h"
>>  #include "fat-rwlock.h"
>>  #include "flow.h"
>> +#include "id-pool.h"
>>  #include "netdev-linux.h"
>>  #include "netdev-offload.h"
>>  #include "netdev-provider.h"
>> @@ -160,6 +161,9 @@ static void dpif_netlink_flow_to_dpif_flow(struct dpif_flow *,
>>  
>>  /* One of the dpif channels between the kernel and userspace. */
>>  struct dpif_channel {
>> +    struct hmap_node hmap_node;
>> +    struct dpif_handler *handler;
>> +    uint32_t port_idx;
>>      struct nl_sock *sock;       /* Netlink socket. */
>>      long long int last_poll;    /* Last time this channel was polled. */
>>  };
>> @@ -183,6 +187,8 @@ struct dpif_handler {
>>      int n_events;                 /* Num events returned by epoll_wait(). */
>>      int event_offset;             /* Offset into 'epoll_events'. */
>>  
>> +    struct hmap channels;         /* map of channels for each port. */
>> +
>>  #ifdef _WIN32
>>      /* Pool of sockets. */
>>      struct dpif_windows_vport_sock *vport_sock_pool;
>> @@ -201,9 +207,8 @@ struct dpif_netlink {
>>      struct fat_rwlock upcall_lock;
>>      struct dpif_handler *handlers;
>>      uint32_t n_handlers;           /* Num of upcall handlers. */
>> -    struct dpif_channel *channels; /* Array of channels for each port. */
>> -    int uc_array_size;             /* Size of 'handler->channels' and */
>> -                                   /* 'handler->epoll_events'. */
>> +
>> +    struct id_pool *port_ids;      /* Set of added port ids */
>>  
>>      /* Change notification. */
>>      struct nl_sock *port_notifier; /* vport multicast group subscriber. */
>> @@ -287,6 +292,27 @@ close_nl_sock(struct nl_sock *sock)
>>  #endif
>>  }
>>  
>> +static size_t
>> +dpif_handler_channels_count(const struct dpif_handler *handler)
>> +{
>> +    return hmap_count(&handler->channels);
>> +}
>> +
>> +static struct dpif_channel *
>> +dpif_handler_channel_find(struct dpif_handler *handler, uint32_t idx)
>> +{
>> +    struct dpif_channel *channel;
>> +
>> +    HMAP_FOR_EACH_WITH_HASH (channel, hmap_node, hash_int(idx, 0),
>> +                             &handler->channels) {
>> +        if (channel->port_idx == idx) {
>> +            return channel;
>> +        }
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>>  static struct dpif_netlink *
>>  dpif_netlink_cast(const struct dpif *dpif)
>>  {
>> @@ -385,6 +411,8 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
>>  
>>      dpif->dp_ifindex = dp->dp_ifindex;
>>      dpif->user_features = dp->user_features;
>> +    dpif->port_ids = id_pool_create(0, MAX_PORTS);
>> +
> 
> Extra line not needed.

Removed

> 
>>      *dpifp = &dpif->dpif;
>>  
>>      return 0;
>> @@ -452,161 +480,140 @@ static bool
>>  vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
>>                uint32_t *upcall_pid)
>>  {
>> -    /* Since the nl_sock can only be assigned in either all
>> -     * or none "dpif" channels, the following check
>> -     * would suffice. */
>> -    if (!dpif->channels[port_idx].sock) {
>> +    size_t i;
> 
> New empty line needed.

Added

> 
>> +    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
>> +
>> +    if (!id_pool_has_id(dpif->port_ids, port_idx)) {
>>          return false;
>>      }
>> -    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
>>  
>> -    *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
>> +    /* The 'port_idx' should only be valid for a single handler. */
>> +    for (i = 0; i < dpif->n_handlers; i++) {
>> +        struct dpif_channel *channel =
>> +            dpif_handler_channel_find(&dpif->handlers[i], port_idx);
> 
> Perhaps:
>        for (i = 0; i < dpif->n_handlers; i++) {
>            struct dpif_channel *channel;
>  
>            channel = dpif_handler_channel_find(&dpif->handlers[i], port_idx);
>        [...]
> 

To improve readability? - Done

>>  
>> -    return true;
>> +        if (channel) {
>> +            *upcall_pid = nl_sock_pid(channel->sock);
>> +            return true;
>> +        }
>> +    }
>> +
>> +    return false;
>>  }
>>  
>>  static int
>>  vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
>>                    struct nl_sock *sock)
>>  {
>> -    struct epoll_event event;
>>      uint32_t port_idx = odp_to_u32(port_no);
>> +    struct dpif_channel *channel;
>> +    struct dpif_handler *handler;
>> +    struct epoll_event event;
>>      size_t i;
>> -    int error;
>>  
>> -    if (dpif->handlers == NULL) {
>> +    if (dpif->handlers == NULL ||
>> +        id_pool_has_id(dpif->port_ids, port_idx)) {
>>          close_nl_sock(sock);
>> -        return 0;
>> +        return EINVAL;
>>      }
>>  
>> -    /* We assume that the datapath densely chooses port numbers, which can
>> -     * therefore be used as an index into 'channels' and 'epoll_events' of
>> -     * 'dpif'. */
>> -    if (port_idx >= dpif->uc_array_size) {
>> -        uint32_t new_size = port_idx + 1;
>> -
>> -        if (new_size > MAX_PORTS) {
>> -            VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
>> -                         dpif_name(&dpif->dpif), port_no);
>> -            return EFBIG;
>> -        }
>> +    if (port_idx >= MAX_PORTS) {
>> +        VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
>> +                     dpif_name(&dpif->dpif), port_no);
>> +        return EFBIG;
>> +    }
>>  
>> -        dpif->channels = xrealloc(dpif->channels,
>> -                                  new_size * sizeof *dpif->channels);
>> +    handler = &dpif->handlers[0];
>>  
>> -        for (i = dpif->uc_array_size; i < new_size; i++) {
>> -            dpif->channels[i].sock = NULL;
>> +    for (i = 0; i < dpif->n_handlers; i++) {
>> +        if (dpif_handler_channels_count(&dpif->handlers[i]) <
>> +            dpif_handler_channels_count(handler)) {
>> +            handler = &dpif->handlers[i];
>>          }
>> +    }
>>  
>> -        for (i = 0; i < dpif->n_handlers; i++) {
>> -            struct dpif_handler *handler = &dpif->handlers[i];
>> -
>> -            handler->epoll_events = xrealloc(handler->epoll_events,
>> -                new_size * sizeof *handler->epoll_events);
>> +    channel = xmalloc(sizeof *channel);
>> +    channel->port_idx = port_idx;
>> +    channel->sock = sock;
>> +    channel->last_poll = LLONG_MIN;
>> +    channel->handler = handler;
>> +    hmap_insert(&handler->channels, &channel->hmap_node,
>> +                hash_int(port_idx, 0));
>>  
>> -        }
>> -        dpif->uc_array_size = new_size;
>> -    }
>> +    handler->epoll_events = xrealloc(handler->epoll_events,
>> +                                     dpif_handler_channels_count(handler) *
>> +                                     sizeof *handler->epoll_events);
>>  
>>      memset(&event, 0, sizeof event);
>>      event.events = EPOLLIN | EPOLLEXCLUSIVE;
>>      event.data.u32 = port_idx;
>>  
>> -    for (i = 0; i < dpif->n_handlers; i++) {
>> -        struct dpif_handler *handler = &dpif->handlers[i];
>> -
>> -#ifndef _WIN32
>> -        if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
>> -                      &event) < 0) {
>> -            error = errno;
>> -            goto error;
>> -        }
>> -#endif
>> -    }
>> -    dpif->channels[port_idx].sock = sock;
>> -    dpif->channels[port_idx].last_poll = LLONG_MIN;
>> -
>> -    return 0;
>> -
>> -error:
>>  #ifndef _WIN32
>> -    while (i--) {
>> -        epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
>> -                  nl_sock_fd(sock), NULL);
>> +    if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
>> +                  &event) < 0) {
>> +        hmap_remove(&handler->channels, &channel->hmap_node);
>> +        free(channel);
>> +        id_pool_free_id(dpif->port_ids, port_idx);
>> +        return errno;
>>      }
>>  #endif
>> -    dpif->channels[port_idx].sock = NULL;
>>  
>> -    return error;
>> +    id_pool_add(dpif->port_ids, port_idx);
>> +    return 0;
>>  }
>>  
>>  static void
>> -vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
>> +vport_del_channel(struct dpif_netlink *dpif,
>> +                  struct dpif_channel *channel)
>>  {
>> -    uint32_t port_idx = odp_to_u32(port_no);
>> -    size_t i;
>> -
>> -    if (!dpif->handlers || port_idx >= dpif->uc_array_size
>> -        || !dpif->channels[port_idx].sock) {
>> -        return;
>> -    }
>> -
>> -    for (i = 0; i < dpif->n_handlers; i++) {
>> -        struct dpif_handler *handler = &dpif->handlers[i];
>> +    if (channel) {
>>  #ifndef _WIN32
>> -        epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
>> -                  nl_sock_fd(dpif->channels[port_idx].sock), NULL);
>> +        epoll_ctl(channel->handler->epoll_fd, EPOLL_CTL_DEL,
>> +                  nl_sock_fd(channel->sock), NULL);
>> +        nl_sock_destroy(channel->sock);
>>  #endif
>> -        handler->event_offset = handler->n_events = 0;
>> +        hmap_remove(&channel->handler->channels, &channel->hmap_node);
>> +        id_pool_free_id(dpif->port_ids, channel->port_idx);
>> +        channel->handler->event_offset = channel->handler->n_events = 0;
>> +        free(channel);
>>      }
>> -#ifndef _WIN32
>> -    nl_sock_destroy(dpif->channels[port_idx].sock);
>> -#endif
>> -    dpif->channels[port_idx].sock = NULL;
>>  }
> 
> 
> The above function could be simplified since almost all callers
> check if channel is not null. The only one that does not check
> should do it to break earlier. It's noted below.
> 
> Also, the double dereferencing can be removed.

Made all these suggested changes.

> 
> 
> static void
> vport_del_channel(struct dpif_netlink *dpif,
>                   struct dpif_channel *channel)
>     OVS_REQ_WRLOCK(dpif->upcall_lock)
> {
>     struct dpif_handler *handler = channel->handler;
> 
> #ifndef _WIN32
>     epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL, nl_sock_fd(channel->sock),
>               NULL);
>     nl_sock_destroy(channel->sock);
> #endif
>     hmap_remove(&handler->channels, &channel->hmap_node);
>     id_pool_free_id(dpif->port_ids, channel->port_idx);
>     handler->event_offset = handler->n_events = 0;
>     free(channel);
> }
> 
> 
> 
>>  
>>  static void
>>  destroy_all_channels(struct dpif_netlink *dpif)
>>      OVS_REQ_WRLOCK(dpif->upcall_lock)
>>  {
>> -    unsigned int i;
>> +    size_t i;
>>  
>>      if (!dpif->handlers) {
>>          return;
>>      }
>>  
>> -    for (i = 0; i < dpif->uc_array_size; i++ ) {
>> -        struct dpif_netlink_vport vport_request;
>> -        uint32_t upcall_pids = 0;
>> -
>> -        if (!dpif->channels[i].sock) {
>> -            continue;
>> -        }
>> -
>> -        /* Turn off upcalls. */
>> -        dpif_netlink_vport_init(&vport_request);
>> -        vport_request.cmd = OVS_VPORT_CMD_SET;
>> -        vport_request.dp_ifindex = dpif->dp_ifindex;
>> -        vport_request.port_no = u32_to_odp(i);
>> -        vport_request.n_upcall_pids = 1;
>> -        vport_request.upcall_pids = &upcall_pids;
>> -        dpif_netlink_vport_transact(&vport_request, NULL, NULL);
>> -
>> -        vport_del_channels(dpif, u32_to_odp(i));
>> -    }
>> -
>>      for (i = 0; i < dpif->n_handlers; i++) {
>>          struct dpif_handler *handler = &dpif->handlers[i];
>> +        struct dpif_channel *channel, *next;
>> +
>> +        HMAP_FOR_EACH_SAFE (channel, next, hmap_node, &handler->channels) {
>> +            struct dpif_netlink_vport vport_request;
>> +            uint32_t upcall_pids = 0;
>> +            /* Turn off upcalls. */
>> +            dpif_netlink_vport_init(&vport_request);
>> +            vport_request.cmd = OVS_VPORT_CMD_SET;
>> +            vport_request.dp_ifindex = dpif->dp_ifindex;
>> +            vport_request.port_no = u32_to_odp(channel->port_idx);
>> +            vport_request.n_upcall_pids = 1;
>> +            vport_request.upcall_pids = &upcall_pids;
>> +            dpif_netlink_vport_transact(&vport_request, NULL, NULL);
>> +
> 
> Extra line not needed.

Removed
> 
>> +            vport_del_channel(dpif, channel);
>> +        }
>>  
>>          dpif_netlink_handler_uninit(handler);
>>          free(handler->epoll_events);
>>      }
>> -    free(dpif->channels);
>> -    free(dpif->handlers);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> I think it is leaking dpif->handlers, correct?

Yes, added in the free() again.

> 
> 
>> +
>>      dpif->handlers = NULL;
>> -    dpif->channels = NULL;
>>      dpif->n_handlers = 0;
>> -    dpif->uc_array_size = 0;
>>  }
>>  
>>  static void
>> @@ -618,9 +625,11 @@ dpif_netlink_close(struct dpif *dpif_)
>>  
>>      fat_rwlock_wrlock(&dpif->upcall_lock);
>>      destroy_all_channels(dpif);
>> +    id_pool_destroy(dpif->port_ids);
>>      fat_rwlock_unlock(&dpif->upcall_lock);
>>  
>>      fat_rwlock_destroy(&dpif->upcall_lock);
>> +
>>      free(dpif);
>>  }
>>  
>> @@ -981,8 +990,10 @@ dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
>>      OVS_REQ_WRLOCK(dpif->upcall_lock)
>>  {
>>      struct dpif_netlink_vport vport;
>> +    struct dpif_channel *channel = NULL;
>>      struct dpif_port dpif_port;
>>      int error;
>> +    size_t i;
>>  
>>      error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
>>      if (error) {
>> @@ -1003,7 +1014,18 @@ dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
>>  #endif
>>      error = dpif_netlink_vport_transact(&vport, NULL, NULL);
>>  
>> -    vport_del_channels(dpif, port_no);
>> +    for (i = 0; i < dpif->n_handlers; i++) {
>> +        channel = dpif_handler_channel_find(&dpif->handlers[i],
>> +                                            odp_to_u32(port_no));
>> +        if (channel) {
>> +            vport_del_channel(dpif, channel);
>> +            break;
>> +        }
>> +    }
>> +
>> +    if (!channel) {
>> +        return EINVAL;
>> +    }
>>  
>>      if (!error && !ovs_tunnels_out_of_tree) {
>>          error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
>> @@ -1084,23 +1106,39 @@ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
>>                              odp_port_t port_no)
>>      OVS_REQ_RDLOCK(dpif->upcall_lock)
>>  {
>> +    struct dpif_channel *min_channel = NULL, *found_channel = NULL;
>>      uint32_t port_idx = odp_to_u32(port_no);
>>      uint32_t pid = 0;
>> +    size_t i;
>> +
>> +    /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
>> +     * channel, since it is not heavily loaded. */
>> +    uint32_t idx = port_no == ODPP_NONE ? 0 : port_idx;
>>  
>> -    if (dpif->handlers && dpif->uc_array_size > 0) {
>> -        /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
>> -         * channel, since it is not heavily loaded. */
>> -        uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
>> -
>> -        /* Needs to check in case the socket pointer is changed in between
>> -         * the holding of upcall_lock.  A known case happens when the main
>> -         * thread deletes the vport while the handler thread is handling
>> -         * the upcall from that port. */
>> -        if (dpif->channels[idx].sock) {
>> -            pid = nl_sock_pid(dpif->channels[idx].sock);
>> +    if (!id_pool_has_id(dpif->port_ids, port_idx)) {
>> +        return 0;
>> +    }
>> +
>> +    if (dpif->handlers) {
>> +        for (i = 0; i < dpif->n_handlers; i++) {
>> +            if (dpif_handler_channel_find(&dpif->handlers[i], idx)) {
>> +                found_channel = dpif_handler_channel_find(&dpif->handlers[i],
>> +                                                          idx);
>> +                break;
>> +            }
>> +
>> +            if (dpif_handler_channel_find(&dpif->handlers[i], 0)) {
>> +                min_channel = dpif_handler_channel_find(&dpif->handlers[i], 0);
>> +            }
>>          }
>>      }
>>  
>> +    if (found_channel) {
>> +        pid = nl_sock_pid(found_channel->sock);
>> +    } else if (min_channel) {
>> +        pid = nl_sock_pid(min_channel->sock);
>> +    }
>> +
>>      return pid;
>>  }
>>  
>> @@ -2342,12 +2380,14 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
>>  static void
>>  dpif_netlink_handler_uninit(struct dpif_handler *handler)
>>  {
>> +    hmap_destroy(&handler->channels);
>>      vport_delete_sock_pool(handler);
>>  }
>>  
>>  static int
>>  dpif_netlink_handler_init(struct dpif_handler *handler)
>>  {
>> +    hmap_init(&handler->channels);
>>      return vport_create_sock_pool(handler);
>>  }
>>  #else
>> @@ -2355,6 +2395,7 @@ dpif_netlink_handler_init(struct dpif_handler *handler)
>>  static int
>>  dpif_netlink_handler_init(struct dpif_handler *handler)
>>  {
>> +    hmap_init(&handler->channels);
>>      handler->epoll_fd = epoll_create(10);
>>      return handler->epoll_fd < 0 ? errno : 0;
>>  }
>> @@ -2362,6 +2403,7 @@ dpif_netlink_handler_init(struct dpif_handler *handler)
>>  static void
>>  dpif_netlink_handler_uninit(struct dpif_handler *handler)
>>  {
>> +    hmap_destroy(&handler->channels);
>>      close(handler->epoll_fd);
>>  }
>>  #endif
>> @@ -2374,12 +2416,11 @@ static int
>>  dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>>      OVS_REQ_WRLOCK(dpif->upcall_lock)
>>  {
>> -    unsigned long int *keep_channels;
>>      struct dpif_netlink_vport vport;
>> -    size_t keep_channels_nbits;
>>      struct nl_dump dump;
>>      uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
>>      struct ofpbuf buf;
>> +    struct dpif_channel *channel;
>>      int retval = 0;
>>      size_t i;
>>  
>> @@ -2412,13 +2453,9 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>>  
>>      for (i = 0; i < n_handlers; i++) {
>>          struct dpif_handler *handler = &dpif->handlers[i];
>> -
>>          handler->event_offset = handler->n_events = 0;
>>      }
>>  
>> -    keep_channels_nbits = dpif->uc_array_size;
>> -    keep_channels = bitmap_allocate(keep_channels_nbits);
>> -
>>      ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
>>      dpif_netlink_port_dump_start__(dpif, &dump);
>>      while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
>> @@ -2426,8 +2463,7 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>>          uint32_t upcall_pid;
>>          int error;
>>  
>> -        if (port_no >= dpif->uc_array_size
>> -            || !vport_get_pid(dpif, port_no, &upcall_pid)) {
>> +        if (!vport_get_pid(dpif, port_no, &upcall_pid)) {
>>              struct nl_sock *sock;
>>              error = create_nl_sock(dpif, &sock);
>>  
>> @@ -2475,25 +2511,17 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
>>              }
>>          }
>>  
>> -        if (port_no < keep_channels_nbits) {
>> -            bitmap_set1(keep_channels, port_no);
>> -        }
>>          continue;
>>  
>>      error:
>> -        vport_del_channels(dpif, vport.port_no);
>> +        for (i = 0; i < n_handlers; i++) {
>> +            channel = dpif_handler_channel_find(&dpif->handlers[i], port_no);
> 
> Here is the place where it doesn't check if channel is NULL.
> 
>> +            vport_del_channel(dpif, channel);
>> +        }
> 
> It could do instead:
> 
>         for (i = 0; i < n_handlers; i++) {
>             channel = dpif_handler_channel_find(&dpif->handlers[i], port_no);
>             if (channel) {
>                 vport_del_channel(dpif, channel);
>                 break;
>             }
>         }
> 
> which is similar in dpif_netlink_port_del__() and perhaps could
> become a function.

Yeah, I will do that. There is another one in destroy_all_channels but
channel should always != NULL. I'll put an assert there.
> 
> What do you think?
> 
> fbl
> 
> 
>>      }
>>      nl_dump_done(&dump);
>>      ofpbuf_uninit(&buf);
>>  
>> -    /* Discard any saved channels that we didn't reuse. */
>> -    for (i = 0; i < keep_channels_nbits; i++) {
>> -        if (!bitmap_is_set(keep_channels, i)) {
>> -            vport_del_channels(dpif, u32_to_odp(i));
>> -        }
>> -    }
>> -    free(keep_channels);
>> -
>>      return retval;
>>  }
>>  
>> @@ -2714,6 +2742,8 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
>>      OVS_REQ_RDLOCK(dpif->upcall_lock)
>>  {
>>      struct dpif_handler *handler;
>> +    size_t n_channels;
>> +
>>      int read_tries = 0;
>>  
>>      if (!dpif->handlers || handler_id >= dpif->n_handlers) {
>> @@ -2721,14 +2751,15 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
>>      }
>>  
>>      handler = &dpif->handlers[handler_id];
>> -    if (handler->event_offset >= handler->n_events) {
>> +    n_channels = dpif_handler_channels_count(handler);
>> +    if (handler->event_offset >= handler->n_events && n_channels) {
>>          int retval;
>>  
>>          handler->event_offset = handler->n_events = 0;
>>  
>>          do {
>>              retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
>> -                                dpif->uc_array_size, 0);
>> +                                n_channels, 0);
>>          } while (retval < 0 && errno == EINTR);
>>  
>>          if (retval < 0) {
>> @@ -2741,7 +2772,10 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
>>  
>>      while (handler->event_offset < handler->n_events) {
>>          int idx = handler->epoll_events[handler->event_offset].data.u32;
>> -        struct dpif_channel *ch = &dpif->channels[idx];
>> +        struct dpif_channel *ch = dpif_handler_channel_find(handler, idx);
>> +        if (!ch) {
>> +            return EAGAIN;
>> +        }
>>  
>>          handler->event_offset++;
>>  
>> @@ -2845,12 +2879,14 @@ dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
>>      if (dpif->handlers) {
>>          size_t i;
>>  
>> -        if (!dpif->channels[0].sock) {
>> -            return;
>> -        }
>> -        for (i = 0; i < dpif->uc_array_size; i++ ) {
>> +        for (i = 0; i < dpif->n_handlers; i++) {
>> +            struct dpif_handler *handler = &dpif->handlers[i];
>> +            struct dpif_channel *channel;
>> +
>> +            HMAP_FOR_EACH (channel, hmap_node, &handler->channels) {
>> +                nl_sock_drain(channel->sock);
>> +            }
>>  
>> -            nl_sock_drain(dpif->channels[i].sock);
>>          }
>>      }
>>  }
>> -- 
>> 2.26.2
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 7da4fb54d..273083fcc 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -37,6 +37,7 @@ 
 #include "dpif-provider.h"
 #include "fat-rwlock.h"
 #include "flow.h"
+#include "id-pool.h"
 #include "netdev-linux.h"
 #include "netdev-offload.h"
 #include "netdev-provider.h"
@@ -160,6 +161,9 @@  static void dpif_netlink_flow_to_dpif_flow(struct dpif_flow *,
 
 /* One of the dpif channels between the kernel and userspace. */
 struct dpif_channel {
+    struct hmap_node hmap_node;
+    struct dpif_handler *handler;
+    uint32_t port_idx;
     struct nl_sock *sock;       /* Netlink socket. */
     long long int last_poll;    /* Last time this channel was polled. */
 };
@@ -183,6 +187,8 @@  struct dpif_handler {
     int n_events;                 /* Num events returned by epoll_wait(). */
     int event_offset;             /* Offset into 'epoll_events'. */
 
+    struct hmap channels;         /* map of channels for each port. */
+
 #ifdef _WIN32
     /* Pool of sockets. */
     struct dpif_windows_vport_sock *vport_sock_pool;
@@ -201,9 +207,8 @@  struct dpif_netlink {
     struct fat_rwlock upcall_lock;
     struct dpif_handler *handlers;
     uint32_t n_handlers;           /* Num of upcall handlers. */
-    struct dpif_channel *channels; /* Array of channels for each port. */
-    int uc_array_size;             /* Size of 'handler->channels' and */
-                                   /* 'handler->epoll_events'. */
+
+    struct id_pool *port_ids;      /* Set of added port ids */
 
     /* Change notification. */
     struct nl_sock *port_notifier; /* vport multicast group subscriber. */
@@ -287,6 +292,27 @@  close_nl_sock(struct nl_sock *sock)
 #endif
 }
 
+static size_t
+dpif_handler_channels_count(const struct dpif_handler *handler)
+{
+    return hmap_count(&handler->channels);
+}
+
+static struct dpif_channel *
+dpif_handler_channel_find(struct dpif_handler *handler, uint32_t idx)
+{
+    struct dpif_channel *channel;
+
+    HMAP_FOR_EACH_WITH_HASH (channel, hmap_node, hash_int(idx, 0),
+                             &handler->channels) {
+        if (channel->port_idx == idx) {
+            return channel;
+        }
+    }
+
+    return NULL;
+}
+
 static struct dpif_netlink *
 dpif_netlink_cast(const struct dpif *dpif)
 {
@@ -385,6 +411,8 @@  open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
 
     dpif->dp_ifindex = dp->dp_ifindex;
     dpif->user_features = dp->user_features;
+    dpif->port_ids = id_pool_create(0, MAX_PORTS);
+
     *dpifp = &dpif->dpif;
 
     return 0;
@@ -452,161 +480,140 @@  static bool
 vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
               uint32_t *upcall_pid)
 {
-    /* Since the nl_sock can only be assigned in either all
-     * or none "dpif" channels, the following check
-     * would suffice. */
-    if (!dpif->channels[port_idx].sock) {
+    size_t i;
+    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
+
+    if (!id_pool_has_id(dpif->port_ids, port_idx)) {
         return false;
     }
-    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
 
-    *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
+    /* The 'port_idx' should only be valid for a single handler. */
+    for (i = 0; i < dpif->n_handlers; i++) {
+        struct dpif_channel *channel =
+            dpif_handler_channel_find(&dpif->handlers[i], port_idx);
 
-    return true;
+        if (channel) {
+            *upcall_pid = nl_sock_pid(channel->sock);
+            return true;
+        }
+    }
+
+    return false;
 }
 
 static int
 vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
                   struct nl_sock *sock)
 {
-    struct epoll_event event;
     uint32_t port_idx = odp_to_u32(port_no);
+    struct dpif_channel *channel;
+    struct dpif_handler *handler;
+    struct epoll_event event;
     size_t i;
-    int error;
 
-    if (dpif->handlers == NULL) {
+    if (dpif->handlers == NULL ||
+        id_pool_has_id(dpif->port_ids, port_idx)) {
         close_nl_sock(sock);
-        return 0;
+        return EINVAL;
     }
 
-    /* We assume that the datapath densely chooses port numbers, which can
-     * therefore be used as an index into 'channels' and 'epoll_events' of
-     * 'dpif'. */
-    if (port_idx >= dpif->uc_array_size) {
-        uint32_t new_size = port_idx + 1;
-
-        if (new_size > MAX_PORTS) {
-            VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
-                         dpif_name(&dpif->dpif), port_no);
-            return EFBIG;
-        }
+    if (port_idx >= MAX_PORTS) {
+        VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
+                     dpif_name(&dpif->dpif), port_no);
+        return EFBIG;
+    }
 
-        dpif->channels = xrealloc(dpif->channels,
-                                  new_size * sizeof *dpif->channels);
+    handler = &dpif->handlers[0];
 
-        for (i = dpif->uc_array_size; i < new_size; i++) {
-            dpif->channels[i].sock = NULL;
+    for (i = 0; i < dpif->n_handlers; i++) {
+        if (dpif_handler_channels_count(&dpif->handlers[i]) <
+            dpif_handler_channels_count(handler)) {
+            handler = &dpif->handlers[i];
         }
+    }
 
-        for (i = 0; i < dpif->n_handlers; i++) {
-            struct dpif_handler *handler = &dpif->handlers[i];
-
-            handler->epoll_events = xrealloc(handler->epoll_events,
-                new_size * sizeof *handler->epoll_events);
+    channel = xmalloc(sizeof *channel);
+    channel->port_idx = port_idx;
+    channel->sock = sock;
+    channel->last_poll = LLONG_MIN;
+    channel->handler = handler;
+    hmap_insert(&handler->channels, &channel->hmap_node,
+                hash_int(port_idx, 0));
 
-        }
-        dpif->uc_array_size = new_size;
-    }
+    handler->epoll_events = xrealloc(handler->epoll_events,
+                                     dpif_handler_channels_count(handler) *
+                                     sizeof *handler->epoll_events);
 
     memset(&event, 0, sizeof event);
     event.events = EPOLLIN | EPOLLEXCLUSIVE;
     event.data.u32 = port_idx;
 
-    for (i = 0; i < dpif->n_handlers; i++) {
-        struct dpif_handler *handler = &dpif->handlers[i];
-
-#ifndef _WIN32
-        if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
-                      &event) < 0) {
-            error = errno;
-            goto error;
-        }
-#endif
-    }
-    dpif->channels[port_idx].sock = sock;
-    dpif->channels[port_idx].last_poll = LLONG_MIN;
-
-    return 0;
-
-error:
 #ifndef _WIN32
-    while (i--) {
-        epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
-                  nl_sock_fd(sock), NULL);
+    if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
+                  &event) < 0) {
+        hmap_remove(&handler->channels, &channel->hmap_node);
+        free(channel);
+        id_pool_free_id(dpif->port_ids, port_idx);
+        return errno;
     }
 #endif
-    dpif->channels[port_idx].sock = NULL;
 
-    return error;
+    id_pool_add(dpif->port_ids, port_idx);
+    return 0;
 }
 
 static void
-vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
+vport_del_channel(struct dpif_netlink *dpif,
+                  struct dpif_channel *channel)
 {
-    uint32_t port_idx = odp_to_u32(port_no);
-    size_t i;
-
-    if (!dpif->handlers || port_idx >= dpif->uc_array_size
-        || !dpif->channels[port_idx].sock) {
-        return;
-    }
-
-    for (i = 0; i < dpif->n_handlers; i++) {
-        struct dpif_handler *handler = &dpif->handlers[i];
+    if (channel) {
 #ifndef _WIN32
-        epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
-                  nl_sock_fd(dpif->channels[port_idx].sock), NULL);
+        epoll_ctl(channel->handler->epoll_fd, EPOLL_CTL_DEL,
+                  nl_sock_fd(channel->sock), NULL);
+        nl_sock_destroy(channel->sock);
 #endif
-        handler->event_offset = handler->n_events = 0;
+        hmap_remove(&channel->handler->channels, &channel->hmap_node);
+        id_pool_free_id(dpif->port_ids, channel->port_idx);
+        channel->handler->event_offset = channel->handler->n_events = 0;
+        free(channel);
     }
-#ifndef _WIN32
-    nl_sock_destroy(dpif->channels[port_idx].sock);
-#endif
-    dpif->channels[port_idx].sock = NULL;
 }
 
 static void
 destroy_all_channels(struct dpif_netlink *dpif)
     OVS_REQ_WRLOCK(dpif->upcall_lock)
 {
-    unsigned int i;
+    size_t i;
 
     if (!dpif->handlers) {
         return;
     }
 
-    for (i = 0; i < dpif->uc_array_size; i++ ) {
-        struct dpif_netlink_vport vport_request;
-        uint32_t upcall_pids = 0;
-
-        if (!dpif->channels[i].sock) {
-            continue;
-        }
-
-        /* Turn off upcalls. */
-        dpif_netlink_vport_init(&vport_request);
-        vport_request.cmd = OVS_VPORT_CMD_SET;
-        vport_request.dp_ifindex = dpif->dp_ifindex;
-        vport_request.port_no = u32_to_odp(i);
-        vport_request.n_upcall_pids = 1;
-        vport_request.upcall_pids = &upcall_pids;
-        dpif_netlink_vport_transact(&vport_request, NULL, NULL);
-
-        vport_del_channels(dpif, u32_to_odp(i));
-    }
-
     for (i = 0; i < dpif->n_handlers; i++) {
         struct dpif_handler *handler = &dpif->handlers[i];
+        struct dpif_channel *channel, *next;
+
+        HMAP_FOR_EACH_SAFE (channel, next, hmap_node, &handler->channels) {
+            struct dpif_netlink_vport vport_request;
+            uint32_t upcall_pids = 0;
+            /* Turn off upcalls. */
+            dpif_netlink_vport_init(&vport_request);
+            vport_request.cmd = OVS_VPORT_CMD_SET;
+            vport_request.dp_ifindex = dpif->dp_ifindex;
+            vport_request.port_no = u32_to_odp(channel->port_idx);
+            vport_request.n_upcall_pids = 1;
+            vport_request.upcall_pids = &upcall_pids;
+            dpif_netlink_vport_transact(&vport_request, NULL, NULL);
+
+            vport_del_channel(dpif, channel);
+        }
 
         dpif_netlink_handler_uninit(handler);
         free(handler->epoll_events);
     }
-    free(dpif->channels);
-    free(dpif->handlers);
+
     dpif->handlers = NULL;
-    dpif->channels = NULL;
     dpif->n_handlers = 0;
-    dpif->uc_array_size = 0;
 }
 
 static void
@@ -618,9 +625,11 @@  dpif_netlink_close(struct dpif *dpif_)
 
     fat_rwlock_wrlock(&dpif->upcall_lock);
     destroy_all_channels(dpif);
+    id_pool_destroy(dpif->port_ids);
     fat_rwlock_unlock(&dpif->upcall_lock);
 
     fat_rwlock_destroy(&dpif->upcall_lock);
+
     free(dpif);
 }
 
@@ -981,8 +990,10 @@  dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
     OVS_REQ_WRLOCK(dpif->upcall_lock)
 {
     struct dpif_netlink_vport vport;
+    struct dpif_channel *channel = NULL;
     struct dpif_port dpif_port;
     int error;
+    size_t i;
 
     error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
     if (error) {
@@ -1003,7 +1014,18 @@  dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
 #endif
     error = dpif_netlink_vport_transact(&vport, NULL, NULL);
 
-    vport_del_channels(dpif, port_no);
+    for (i = 0; i < dpif->n_handlers; i++) {
+        channel = dpif_handler_channel_find(&dpif->handlers[i],
+                                            odp_to_u32(port_no));
+        if (channel) {
+            vport_del_channel(dpif, channel);
+            break;
+        }
+    }
+
+    if (!channel) {
+        return EINVAL;
+    }
 
     if (!error && !ovs_tunnels_out_of_tree) {
         error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
@@ -1084,23 +1106,39 @@  dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
                             odp_port_t port_no)
     OVS_REQ_RDLOCK(dpif->upcall_lock)
 {
+    struct dpif_channel *min_channel = NULL, *found_channel = NULL;
     uint32_t port_idx = odp_to_u32(port_no);
     uint32_t pid = 0;
+    size_t i;
+
+    /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
+     * channel, since it is not heavily loaded. */
+    uint32_t idx = port_no == ODPP_NONE ? 0 : port_idx;
 
-    if (dpif->handlers && dpif->uc_array_size > 0) {
-        /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
-         * channel, since it is not heavily loaded. */
-        uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
-
-        /* Needs to check in case the socket pointer is changed in between
-         * the holding of upcall_lock.  A known case happens when the main
-         * thread deletes the vport while the handler thread is handling
-         * the upcall from that port. */
-        if (dpif->channels[idx].sock) {
-            pid = nl_sock_pid(dpif->channels[idx].sock);
+    if (!id_pool_has_id(dpif->port_ids, port_idx)) {
+        return 0;
+    }
+
+    if (dpif->handlers) {
+        for (i = 0; i < dpif->n_handlers; i++) {
+            if (dpif_handler_channel_find(&dpif->handlers[i], idx)) {
+                found_channel = dpif_handler_channel_find(&dpif->handlers[i],
+                                                          idx);
+                break;
+            }
+
+            if (dpif_handler_channel_find(&dpif->handlers[i], 0)) {
+                min_channel = dpif_handler_channel_find(&dpif->handlers[i], 0);
+            }
         }
     }
 
+    if (found_channel) {
+        pid = nl_sock_pid(found_channel->sock);
+    } else if (min_channel) {
+        pid = nl_sock_pid(min_channel->sock);
+    }
+
     return pid;
 }
 
@@ -2342,12 +2380,14 @@  dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
 static void
 dpif_netlink_handler_uninit(struct dpif_handler *handler)
 {
+    hmap_destroy(&handler->channels);
     vport_delete_sock_pool(handler);
 }
 
 static int
 dpif_netlink_handler_init(struct dpif_handler *handler)
 {
+    hmap_init(&handler->channels);
     return vport_create_sock_pool(handler);
 }
 #else
@@ -2355,6 +2395,7 @@  dpif_netlink_handler_init(struct dpif_handler *handler)
 static int
 dpif_netlink_handler_init(struct dpif_handler *handler)
 {
+    hmap_init(&handler->channels);
     handler->epoll_fd = epoll_create(10);
     return handler->epoll_fd < 0 ? errno : 0;
 }
@@ -2362,6 +2403,7 @@  dpif_netlink_handler_init(struct dpif_handler *handler)
 static void
 dpif_netlink_handler_uninit(struct dpif_handler *handler)
 {
+    hmap_destroy(&handler->channels);
     close(handler->epoll_fd);
 }
 #endif
@@ -2374,12 +2416,11 @@  static int
 dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
     OVS_REQ_WRLOCK(dpif->upcall_lock)
 {
-    unsigned long int *keep_channels;
     struct dpif_netlink_vport vport;
-    size_t keep_channels_nbits;
     struct nl_dump dump;
     uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
     struct ofpbuf buf;
+    struct dpif_channel *channel;
     int retval = 0;
     size_t i;
 
@@ -2412,13 +2453,9 @@  dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
 
     for (i = 0; i < n_handlers; i++) {
         struct dpif_handler *handler = &dpif->handlers[i];
-
         handler->event_offset = handler->n_events = 0;
     }
 
-    keep_channels_nbits = dpif->uc_array_size;
-    keep_channels = bitmap_allocate(keep_channels_nbits);
-
     ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
     dpif_netlink_port_dump_start__(dpif, &dump);
     while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
@@ -2426,8 +2463,7 @@  dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
         uint32_t upcall_pid;
         int error;
 
-        if (port_no >= dpif->uc_array_size
-            || !vport_get_pid(dpif, port_no, &upcall_pid)) {
+        if (!vport_get_pid(dpif, port_no, &upcall_pid)) {
             struct nl_sock *sock;
             error = create_nl_sock(dpif, &sock);
 
@@ -2475,25 +2511,17 @@  dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
             }
         }
 
-        if (port_no < keep_channels_nbits) {
-            bitmap_set1(keep_channels, port_no);
-        }
         continue;
 
     error:
-        vport_del_channels(dpif, vport.port_no);
+        for (i = 0; i < n_handlers; i++) {
+            channel = dpif_handler_channel_find(&dpif->handlers[i], port_no);
+            vport_del_channel(dpif, channel);
+        }
     }
     nl_dump_done(&dump);
     ofpbuf_uninit(&buf);
 
-    /* Discard any saved channels that we didn't reuse. */
-    for (i = 0; i < keep_channels_nbits; i++) {
-        if (!bitmap_is_set(keep_channels, i)) {
-            vport_del_channels(dpif, u32_to_odp(i));
-        }
-    }
-    free(keep_channels);
-
     return retval;
 }
 
@@ -2714,6 +2742,8 @@  dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
     OVS_REQ_RDLOCK(dpif->upcall_lock)
 {
     struct dpif_handler *handler;
+    size_t n_channels;
+
     int read_tries = 0;
 
     if (!dpif->handlers || handler_id >= dpif->n_handlers) {
@@ -2721,14 +2751,15 @@  dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
     }
 
     handler = &dpif->handlers[handler_id];
-    if (handler->event_offset >= handler->n_events) {
+    n_channels = dpif_handler_channels_count(handler);
+    if (handler->event_offset >= handler->n_events && n_channels) {
         int retval;
 
         handler->event_offset = handler->n_events = 0;
 
         do {
             retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
-                                dpif->uc_array_size, 0);
+                                n_channels, 0);
         } while (retval < 0 && errno == EINTR);
 
         if (retval < 0) {
@@ -2741,7 +2772,10 @@  dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
 
     while (handler->event_offset < handler->n_events) {
         int idx = handler->epoll_events[handler->event_offset].data.u32;
-        struct dpif_channel *ch = &dpif->channels[idx];
+        struct dpif_channel *ch = dpif_handler_channel_find(handler, idx);
+        if (!ch) {
+            return EAGAIN;
+        }
 
         handler->event_offset++;
 
@@ -2845,12 +2879,14 @@  dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
     if (dpif->handlers) {
         size_t i;
 
-        if (!dpif->channels[0].sock) {
-            return;
-        }
-        for (i = 0; i < dpif->uc_array_size; i++ ) {
+        for (i = 0; i < dpif->n_handlers; i++) {
+            struct dpif_handler *handler = &dpif->handlers[i];
+            struct dpif_channel *channel;
+
+            HMAP_FOR_EACH (channel, hmap_node, &handler->channels) {
+                nl_sock_drain(channel->sock);
+            }
 
-            nl_sock_drain(dpif->channels[i].sock);
         }
     }
 }