[ovs-dev,v6,3/3] dpif-netlink: Introduce per-cpu upcall dispatch

Message ID	20210716101736.1291334-4-mark.d.gray@redhat.com
State	Accepted
Headers	show Return-Path: <ovs-dev-bounces@openvswitch.org> From: Mark Gray <mark.d.gray@redhat.com> To: dev@openvswitch.org Date: Fri, 16 Jul 2021 06:17:36 -0400 Message-Id: <20210716101736.1291334-4-mark.d.gray@redhat.com> In-Reply-To: <20210716101736.1291334-1-mark.d.gray@redhat.com> References: <20210716101736.1291334-1-mark.d.gray@redhat.com> MIME-Version: 1.0 Cc: fbl@sysclose.org, david.marchand@redhat.com Subject: [ovs-dev] [PATCH v6 3/3] dpif-netlink: Introduce per-cpu upcall dispatch Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ovs-dev-bounces@openvswitch.org Sender: "dev" <ovs-dev-bounces@openvswitch.org>
Series	dpif-netlink: Introduce per-cpu upcall dispatching \| expand [ovs-dev,v6,0/3] dpif-netlink: Introduce per-cpu upcall dispatching [ovs-dev,v6,1/3] ofproto: change type of n_handlers and n_revalidators [ovs-dev,v6,2/3] dpif-netlink: fix report_loss() message [ovs-dev,v6,3/3] dpif-netlink: Introduce per-cpu upcall dispatch

Context	Check	Description
ovsrobot/apply-robot	success	apply and check: success
ovsrobot/github-robot	success	github build: passed

diff --git a/NEWS b/NEWS index aed7d811b0ef..895cae107b7f 100644 --- a/NEWS +++ b/NEWS @@ -53,6 +53,12 @@ Post-v2.15.0 - OVS now reports the datapath capability 'ct_zero_snat', which reflects whether the SNAT with all-zero IP address is supported. See ovs-vswitchd.conf.db(5) for details. + - Linux datapath: + * ovs-vswitchd will configure the kernel module using per-cpu dispatch + mode (if available). This changes the way upcalls are delivered to user + space in order to resolve a number of issues with per-vport dispatch. + * New vswitchd unixctl command `dpif-netlink/dispatch-mode` will return + the current dispatch mode for each datapath. v2.15.0 - 15 Feb 2021 diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 875de20250ce..f29265df055e 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -89,6 +89,8 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the @@ -105,6 +107,8 @@ enum ovs_datapath_attr { OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, + OVS_DP_ATTR_PAD2, + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls */ __OVS_DP_ATTR_MAX }; @@ -146,6 +150,9 @@ struct ovs_vport_stats { /* Allow tc offload recirc sharing */ #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) +/* Allow per-cpu dispatch of upcalls */ +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/lib/automake.mk b/lib/automake.mk index 3c9523c1a305..1f76dec7bede 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -564,6 +564,7 @@ MAN_FRAGMENTS += \ lib/memory-unixctl.man \ lib/netdev-dpdk-unixctl.man \ lib/dpif-netdev-unixctl.man \ + lib/dpif-netlink-unixctl.man \ lib/ofp-version.man \ lib/ovs.tmac \ lib/ovs-replay.man \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 610949f3672c..1addf404e771 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -8225,6 +8225,7 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_operate, NULL, /* recv_set */ NULL, /* handlers_set */ + NULL, /* number_handlers_required */ dpif_netdev_set_config, dpif_netdev_queue_to_priority, NULL, /* recv */ diff --git a/lib/dpif-netlink-unixctl.man b/lib/dpif-netlink-unixctl.man new file mode 100644 index 000000000000..ce5c47e85c80 --- /dev/null +++ b/lib/dpif-netlink-unixctl.man @@ -0,0 +1,6 @@ +.SS "DPIF-NETLINK COMMANDS" +These commands are used to expose internal information of the "dpif-netlink" +kernel space datapath. +. +.IP "\fBdpif-netlink/dispatch-mode\fR" +Displays the "dispatch-mode" for all datapaths. diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index e6cb3ca7faea..ccabcff615f6 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -84,6 +84,9 @@ enum { MAX_PORTS = USHRT_MAX }; #define EPOLLEXCLUSIVE (1u << 28) #endif +/* This PID is not used by the kernel datapath when using dispatch per CPU, + * but it is required to be set (not zero). */ +#define DPIF_NETLINK_PER_CPU_PID UINT32_MAX struct dpif_netlink_dp { /* Generic Netlink header. */ uint8_t cmd; @@ -98,6 +101,8 @@ struct dpif_netlink_dp { const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */ const struct ovs_dp_megaflow_stats *megaflow_stats; /* OVS_DP_ATTR_MEGAFLOW_STATS.*/ + const uint32_t *upcall_pids; /* OVS_DP_ATTR_PER_CPU_PIDS */ + uint32_t n_upcall_pids; }; static void dpif_netlink_dp_init(struct dpif_netlink_dp *); @@ -113,6 +118,10 @@ static int dpif_netlink_dp_get(const struct dpif *, static int dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features); +static void +dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn, int argc, + const char *argv[], void *aux); + struct dpif_netlink_flow { /* Generic Netlink header. */ uint8_t cmd; @@ -178,11 +187,16 @@ struct dpif_windows_vport_sock { #endif struct dpif_handler { + /* per-vport dispatch mode */ struct epoll_event *epoll_events; int epoll_fd; /* epoll fd that includes channel socks. */ int n_events; /* Num events returned by epoll_wait(). */ int event_offset; /* Offset into 'epoll_events'. */ + /* per-cpu dispatch mode */ + struct nl_sock *sock; /* Each handler thread holds one netlink + socket. */ + #ifdef _WIN32 /* Pool of sockets. */ struct dpif_windows_vport_sock *vport_sock_pool; @@ -201,6 +215,8 @@ struct dpif_netlink { struct fat_rwlock upcall_lock; struct dpif_handler *handlers; uint32_t n_handlers; /* Num of upcall handlers. */ + + /* Per-vport dispatch mode */ struct dpif_channel *channels; /* Array of channels for each port. */ int uc_array_size; /* Size of 'handler->channels' and */ /* 'handler->epoll_events'. */ @@ -241,8 +257,12 @@ static int open_dpif(const struct dpif_netlink_dp *, struct dpif **); static uint32_t dpif_netlink_port_get_pid(const struct dpif *, odp_port_t port_no); static void dpif_netlink_handler_uninit(struct dpif_handler *handler); -static int dpif_netlink_refresh_channels(struct dpif_netlink *, - uint32_t n_handlers); +static int dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *, + uint32_t n_handlers); +static void destroy_all_channels(struct dpif_netlink *); +static int dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *); +static void destroy_all_handlers(struct dpif_netlink *); + static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *, struct ofpbuf *); static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *, @@ -294,6 +314,11 @@ dpif_netlink_cast(const struct dpif *dpif) return CONTAINER_OF(dpif, struct dpif_netlink, dpif); } +static inline bool +dpif_netlink_upcall_per_cpu(const struct dpif_netlink *dpif) { + return !!((dpif)->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU); +} + static int dpif_netlink_enumerate(struct sset *all_dps, const struct dpif_class *dpif_class OVS_UNUSED) @@ -357,9 +382,30 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, dp_request.cmd = OVS_DP_CMD_SET; } + /* The Open vSwitch kernel module has two modes for dispatching upcalls: + * per-vport and per-cpu. + * + * When dispatching upcalls per-vport, the kernel will + * send the upcall via a Netlink socket that has been selected based on the + * vport that received the packet that is causing the upcall. + * + * When dispatching upcall per-cpu, the kernel will send the upcall via + * a Netlink socket that has been selected based on the cpu that received + * the packet that is causing the upcall. + * + * First we test to see if the kernel module supports per-cpu dispatching + * (the preferred method). If it does not support per-cpu dispatching, we + * fall back to the per-vport dispatch mode. + */ dp_request.user_features |= OVS_DP_F_UNALIGNED; - dp_request.user_features |= OVS_DP_F_VPORT_PIDS; + dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS; + dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU; error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); + if (error) { + dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU; + dp_request.user_features |= OVS_DP_F_VPORT_PIDS; + error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); + } if (error) { return error; } @@ -368,6 +414,12 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING); ofpbuf_delete(buf); + if (create) { + VLOG_INFO("Datapath dispatch mode: %s", + dpif_netlink_upcall_per_cpu(dpif_netlink_cast(*dpifp)) ? + "per-cpu" : "per-vport"); + } + return error; } @@ -609,6 +661,24 @@ destroy_all_channels(struct dpif_netlink *dpif) dpif->uc_array_size = 0; } +static void +destroy_all_handlers(struct dpif_netlink *dpif) + OVS_REQ_WRLOCK(dpif->upcall_lock) +{ + int i = 0; + + if (!dpif->handlers) { + return; + } + for (i = 0; i < dpif->n_handlers; i++) { + struct dpif_handler *handler = &dpif->handlers[i]; + close_nl_sock(handler->sock); + } + free(dpif->handlers); + dpif->handlers = NULL; + dpif->n_handlers = 0; +} + static void dpif_netlink_close(struct dpif *dpif_) { @@ -617,7 +687,11 @@ dpif_netlink_close(struct dpif *dpif_) nl_sock_destroy(dpif->port_notifier); fat_rwlock_wrlock(&dpif->upcall_lock); - destroy_all_channels(dpif); + if (dpif_netlink_upcall_per_cpu(dpif)) { + destroy_all_handlers(dpif); + } else { + destroy_all_channels(dpif); + } fat_rwlock_unlock(&dpif->upcall_lock); fat_rwlock_destroy(&dpif->upcall_lock); @@ -641,11 +715,14 @@ dpif_netlink_run(struct dpif *dpif_) { struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); - if (dpif->refresh_channels) { - dpif->refresh_channels = false; - fat_rwlock_wrlock(&dpif->upcall_lock); - dpif_netlink_refresh_channels(dpif, dpif->n_handlers); - fat_rwlock_unlock(&dpif->upcall_lock); + if (!dpif_netlink_upcall_per_cpu(dpif)) { + if (dpif->refresh_channels) { + dpif->refresh_channels = false; + fat_rwlock_wrlock(&dpif->upcall_lock); + dpif_netlink_refresh_handlers_vport_dispatch(dpif, + dpif->n_handlers); + fat_rwlock_unlock(&dpif->upcall_lock); + } } return false; } @@ -681,6 +758,41 @@ dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) return error; } +static int +dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids, + uint32_t n_upcall_pids) +{ + struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); + struct dpif_netlink_dp request, reply; + struct ofpbuf *bufp; + int error; + int n_cores; + + n_cores = count_cpu_cores(); + ovs_assert(n_cores == n_upcall_pids); + VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores); + + dpif_netlink_dp_init(&request); + request.cmd = OVS_DP_CMD_SET; + request.name = dpif_->base_name; + request.dp_ifindex = dpif->dp_ifindex; + request.user_features = dpif->user_features | + OVS_DP_F_DISPATCH_UPCALL_PER_CPU; + + request.upcall_pids = upcall_pids; + request.n_upcall_pids = n_cores; + + error = dpif_netlink_dp_transact(&request, &reply, &bufp); + if (!error) { + dpif->user_features = reply.user_features; + ofpbuf_delete(bufp); + if (!dpif_netlink_upcall_per_cpu(dpif)) { + return -EOPNOTSUPP; + } + } + return error; +} + static int dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features) { @@ -741,7 +853,7 @@ get_vport_type(const struct dpif_netlink_vport *vport) return "erspan"; case OVS_VPORT_TYPE_IP6ERSPAN: - return "ip6erspan"; + return "ip6erspan"; case OVS_VPORT_TYPE_IP6GRE: return "ip6gre"; @@ -807,10 +919,16 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name, uint32_t upcall_pids = 0; int error = 0; - if (dpif->handlers) { - error = create_nl_sock(dpif, &sock); - if (error) { - return error; + /* per-cpu dispatch mode does not require a socket per vport */ + if (!dpif_netlink_upcall_per_cpu(dpif)) { + if (dpif->handlers) { + error = create_nl_sock(dpif, &sock); + if (error) { + return error; + } + } + if (sock) { + upcall_pids = nl_sock_pid(sock); } } @@ -821,9 +939,6 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name, request.name = name; request.port_no = *port_nop; - if (sock) { - upcall_pids = nl_sock_pid(sock); - } request.n_upcall_pids = 1; request.upcall_pids = &upcall_pids; @@ -845,19 +960,21 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name, goto exit; } - error = vport_add_channel(dpif, *port_nop, sock); - if (error) { - VLOG_INFO("%s: could not add channel for port %s", - dpif_name(&dpif->dpif), name); - - /* Delete the port. */ - dpif_netlink_vport_init(&request); - request.cmd = OVS_VPORT_CMD_DEL; - request.dp_ifindex = dpif->dp_ifindex; - request.port_no = *port_nop; - dpif_netlink_vport_transact(&request, NULL, NULL); - close_nl_sock(sock); - goto exit; + if (!dpif_netlink_upcall_per_cpu(dpif)) { + error = vport_add_channel(dpif, *port_nop, sock); + if (error) { + VLOG_INFO("%s: could not add channel for port %s", + dpif_name(&dpif->dpif), name); + + /* Delete the port. */ + dpif_netlink_vport_init(&request); + request.cmd = OVS_VPORT_CMD_DEL; + request.dp_ifindex = dpif->dp_ifindex; + request.port_no = *port_nop; + dpif_netlink_vport_transact(&request, NULL, NULL); + close_nl_sock(sock); + goto exit; + } } exit: @@ -1115,6 +1232,16 @@ dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no) const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); uint32_t ret; + /* In per-cpu dispatch mode, vports do not have an associated PID */ + if (dpif_netlink_upcall_per_cpu(dpif)) { + /* In per-cpu dispatch mode, this will be ignored as kernel space will + * select the PID before sending to user space. We set to + * DPIF_NETLINK_PER_CPU_PID as 0 is rejected by kernel space as an + * invalid PID. + */ + return DPIF_NETLINK_PER_CPU_PID; + } + fat_rwlock_rdlock(&dpif->upcall_lock); ret = dpif_netlink_port_get_pid__(dpif, port_no); fat_rwlock_unlock(&dpif->upcall_lock); @@ -2326,12 +2453,51 @@ dpif_netlink_handler_uninit(struct dpif_handler *handler) } #endif +static int +dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *dpif) + OVS_REQ_WRLOCK(dpif->upcall_lock) +{ + int handler_id; + int error = 0; + uint32_t n_handlers; + uint32_t *upcall_pids; + + n_handlers = count_cpu_cores(); + if (dpif->n_handlers != n_handlers) { + VLOG_DBG("Dispatch mode(per-cpu): initializing %d handlers", + n_handlers); + destroy_all_handlers(dpif); + upcall_pids = xzalloc(n_handlers * sizeof *upcall_pids); + dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers); + for (handler_id = 0; handler_id < n_handlers; handler_id++) { + struct dpif_handler *handler = &dpif->handlers[handler_id]; + error = create_nl_sock(dpif, &handler->sock); + if (error) { + VLOG_ERR("Dispatch mode(per-cpu): Cannot create socket for" + "handler %d", handler_id); + continue; + } + upcall_pids[handler_id] = nl_sock_pid(handler->sock); + VLOG_DBG("Dispatch mode(per-cpu): " + "handler %d has Netlink PID of %u", + handler_id, upcall_pids[handler_id]); + } + + dpif->n_handlers = n_handlers; + error = dpif_netlink_set_handler_pids(&dpif->dpif, upcall_pids, + n_handlers); + free(upcall_pids); + } + return error; +} + /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports * currently in 'dpif' in the kernel, by adding a new set of channels for * any kernel vport that lacks one and deleting any channels that have no * backing kernel vports. */ static int -dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers) +dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *dpif, + uint32_t n_handlers) OVS_REQ_WRLOCK(dpif->upcall_lock) { unsigned long int *keep_channels; @@ -2458,7 +2624,7 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers) } static int -dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable) +dpif_netlink_recv_set_vport_dispatch(struct dpif_netlink *dpif, bool enable) OVS_REQ_WRLOCK(dpif->upcall_lock) { if ((dpif->handlers != NULL) == enable) { @@ -2467,7 +2633,21 @@ dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable) destroy_all_channels(dpif); return 0; } else { - return dpif_netlink_refresh_channels(dpif, 1); + return dpif_netlink_refresh_handlers_vport_dispatch(dpif, 1); + } +} + +static int +dpif_netlink_recv_set_cpu_dispatch(struct dpif_netlink *dpif, bool enable) + OVS_REQ_WRLOCK(dpif->upcall_lock) +{ + if ((dpif->handlers != NULL) == enable) { + return 0; + } else if (!enable) { + destroy_all_handlers(dpif); + return 0; + } else { + return dpif_netlink_refresh_handlers_cpu_dispatch(dpif); } } @@ -2478,7 +2658,11 @@ dpif_netlink_recv_set(struct dpif *dpif_, bool enable) int error; fat_rwlock_wrlock(&dpif->upcall_lock); - error = dpif_netlink_recv_set__(dpif, enable); + if (dpif_netlink_upcall_per_cpu(dpif)) { + error = dpif_netlink_recv_set_cpu_dispatch(dpif, enable); + } else { + error = dpif_netlink_recv_set_vport_dispatch(dpif, enable); + } fat_rwlock_unlock(&dpif->upcall_lock); return error; @@ -2500,13 +2684,31 @@ dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers) fat_rwlock_wrlock(&dpif->upcall_lock); if (dpif->handlers) { - error = dpif_netlink_refresh_channels(dpif, n_handlers); + if (dpif_netlink_upcall_per_cpu(dpif)) { + error = dpif_netlink_refresh_handlers_cpu_dispatch(dpif); + } else { + error = dpif_netlink_refresh_handlers_vport_dispatch(dpif, + n_handlers); + } } fat_rwlock_unlock(&dpif->upcall_lock); return error; } +static bool +dpif_netlink_number_handlers_required(struct dpif *dpif_, uint32_t *n_handlers) +{ + struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); + + if (dpif_netlink_upcall_per_cpu(dpif)) { + *n_handlers = count_cpu_cores(); + return true; + } + + return false; +} + static int dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED, uint32_t queue_id, uint32_t *priority) @@ -2669,8 +2871,59 @@ dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id, } #else static int -dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id, - struct dpif_upcall *upcall, struct ofpbuf *buf) +dpif_netlink_recv_cpu_dispatch(struct dpif_netlink *dpif, uint32_t handler_id, + struct dpif_upcall *upcall, struct ofpbuf *buf) + OVS_REQ_RDLOCK(dpif->upcall_lock) +{ + struct dpif_handler *handler; + int read_tries = 0; + + if (!dpif->handlers || handler_id >= dpif->n_handlers) { + return EAGAIN; + } + + handler = &dpif->handlers[handler_id]; + + for (;;) { + int dp_ifindex; + int error; + + if (++read_tries > 50) { + return EAGAIN; + } + error = nl_sock_recv(handler->sock, buf, NULL, false); + if (error == ENOBUFS) { + /* ENOBUFS typically means that we've received so many + * packets that the buffer overflowed. Try again + * immediately because there's almost certainly a packet + * waiting for us. */ + report_loss(dpif, NULL, 0, handler_id); + continue; + } + + if (error) { + if (error == EAGAIN) { + break; + } + return error; + } + + error = parse_odp_packet(buf, upcall, &dp_ifindex); + if (!error && dp_ifindex == dpif->dp_ifindex) { + return 0; + } else if (error) { + return error; + } + } + + return EAGAIN; +} + +static int +dpif_netlink_recv_vport_dispatch(struct dpif_netlink *dpif, + uint32_t handler_id, + struct dpif_upcall *upcall, + struct ofpbuf *buf) OVS_REQ_RDLOCK(dpif->upcall_lock) { struct dpif_handler *handler; @@ -2755,18 +3008,24 @@ dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id, #ifdef _WIN32 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf); #else - error = dpif_netlink_recv__(dpif, handler_id, upcall, buf); + if (dpif_netlink_upcall_per_cpu(dpif)) { + error = dpif_netlink_recv_cpu_dispatch(dpif, handler_id, upcall, buf); + } else { + error = dpif_netlink_recv_vport_dispatch(dpif, + handler_id, upcall, buf); + } #endif fat_rwlock_unlock(&dpif->upcall_lock); return error; } +#ifdef _WIN32 static void -dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id) +dpif_netlink_recv_wait_windows(struct dpif_netlink *dpif, uint32_t handler_id) OVS_REQ_RDLOCK(dpif->upcall_lock) { -#ifdef _WIN32 + uint32_t i; struct dpif_windows_vport_sock *sock_pool = dpif->handlers[handler_id].vport_sock_pool; @@ -2779,27 +3038,54 @@ dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id) for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) { nl_sock_wait(sock_pool[i].nl_sock, POLLIN); } +} #else + +static void +dpif_netlink_recv_wait_vport_dispatch(struct dpif_netlink *dpif, + uint32_t handler_id) + OVS_REQ_RDLOCK(dpif->upcall_lock) +{ if (dpif->handlers && handler_id < dpif->n_handlers) { struct dpif_handler *handler = &dpif->handlers[handler_id]; poll_fd_wait(handler->epoll_fd, POLLIN); } -#endif } +static void +dpif_netlink_recv_wait_cpu_dispatch(struct dpif_netlink *dpif, + uint32_t handler_id) + OVS_REQ_RDLOCK(dpif->upcall_lock) +{ + if (dpif->handlers && handler_id < dpif->n_handlers) { + struct dpif_handler *handler = &dpif->handlers[handler_id]; + + poll_fd_wait(nl_sock_fd(handler->sock), POLLIN); + } +} +#endif + static void dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id) { struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); fat_rwlock_rdlock(&dpif->upcall_lock); - dpif_netlink_recv_wait__(dpif, handler_id); +#ifdef _WIN32 + dpif_netlink_recv_wait_windows(dpif, handler_id); +#else + if (dpif_netlink_upcall_per_cpu(dpif)) { + dpif_netlink_recv_wait_cpu_dispatch(dpif, handler_id); + } else { + dpif_netlink_recv_wait_vport_dispatch(dpif, handler_id); + } +#endif fat_rwlock_unlock(&dpif->upcall_lock); } static void -dpif_netlink_recv_purge__(struct dpif_netlink *dpif) +dpif_netlink_recv_purge_vport_dispatch(struct dpif_netlink *dpif) OVS_REQ_WRLOCK(dpif->upcall_lock) { if (dpif->handlers) { @@ -2815,13 +3101,31 @@ dpif_netlink_recv_purge__(struct dpif_netlink *dpif) } } +static void +dpif_netlink_recv_purge_cpu_dispatch(struct dpif_netlink *dpif) + OVS_REQ_WRLOCK(dpif->upcall_lock) +{ + int handler_id; + + if (dpif->handlers) { + for (handler_id = 0; handler_id < dpif->n_handlers; handler_id++) { + struct dpif_handler *handler = &dpif->handlers[handler_id]; + nl_sock_drain(handler->sock); + } + } +} + static void dpif_netlink_recv_purge(struct dpif *dpif_) { struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); fat_rwlock_wrlock(&dpif->upcall_lock); - dpif_netlink_recv_purge__(dpif); + if (dpif_netlink_upcall_per_cpu(dpif)) { + dpif_netlink_recv_purge_cpu_dispatch(dpif); + } else { + dpif_netlink_recv_purge_vport_dispatch(dpif); + } fat_rwlock_unlock(&dpif->upcall_lock); } @@ -3992,6 +4296,7 @@ const struct dpif_class dpif_netlink_class = { dpif_netlink_operate, dpif_netlink_recv_set, dpif_netlink_handlers_set, + dpif_netlink_number_handlers_required, NULL, /* set_config */ dpif_netlink_queue_to_priority, dpif_netlink_recv, @@ -4080,6 +4385,9 @@ dpif_netlink_init(void) ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels(); + unixctl_command_register("dpif-netlink/dispatch-mode", "", 0, 0, + dpif_netlink_unixctl_dispatch_mode, NULL); + ovsthread_once_done(&once); } @@ -4356,6 +4664,11 @@ dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf) nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features); } + if (dp->upcall_pids) { + nl_msg_put_unspec(buf, OVS_DP_ATTR_PER_CPU_PIDS, dp->upcall_pids, + sizeof *dp->upcall_pids * dp->n_upcall_pids); + } + /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */ } @@ -4675,13 +4988,58 @@ report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx, return; } - ds_init(&s); - if (ch->last_poll != LLONG_MIN) { - ds_put_format(&s, " (last polled %lld ms ago)", - time_msec() - ch->last_poll); + if (dpif_netlink_upcall_per_cpu(dpif)) { + VLOG_WARN("%s: lost packet on handler %u", + dpif_name(&dpif->dpif), handler_id); + } else { + ds_init(&s); + if (ch->last_poll != LLONG_MIN) { + ds_put_format(&s, " (last polled %lld ms ago)", + time_msec() - ch->last_poll); + } + + VLOG_WARN("%s: lost packet on port channel %u of handler %u%s", + dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s)); + ds_destroy(&s); + } +} + +static void +dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn, + int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, + void *aux OVS_UNUSED) +{ + struct ds reply = DS_EMPTY_INITIALIZER; + struct nl_dump dump; + uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; + struct ofpbuf msg, buf; + int error; + + error = dpif_netlink_init(); + if (error) { + return; + } + + ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); + dpif_netlink_dp_dump_start(&dump); + while (nl_dump_next(&dump, &msg, &buf)) { + struct dpif_netlink_dp dp; + if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) { + ds_put_format(&reply, "%s: ", dp.name); + if (dp.user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) { + ds_put_format(&reply, "per-cpu dispatch mode"); + } else { + ds_put_format(&reply, "per-vport dispatch mode"); + } + ds_put_format(&reply, "\n"); + } + } + ofpbuf_uninit(&buf); + error = nl_dump_done(&dump); + if (!error) { + unixctl_command_reply(conn, ds_cstr(&reply)); } - VLOG_WARN("%s: lost packet on port channel %u of handler %u%s", - dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s)); - ds_destroy(&s); + ds_destroy(&reply); } diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 59e0a3a9dd21..7e11b969703c 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -337,26 +337,28 @@ struct dpif_class { * updating flows as necessary if it does this. */ int (*recv_set)(struct dpif *dpif, bool enable); - /* Refreshes the poll loops and Netlink sockets associated to each port, - * when the number of upcall handlers (upcall receiving thread) is changed - * to 'n_handlers' and receiving packets for 'dpif' is enabled by + /* Attempts to refresh the poll loops and Netlink sockets used for handling + * upcalls when the number of upcall handlers (upcall receiving thread) is + * changed to 'n_handlers' and receiving packets for 'dpif' is enabled by * recv_set(). * - * Since multiple upcall handlers can read upcalls simultaneously from - * 'dpif', each port can have multiple Netlink sockets, one per upcall - * handler. So, handlers_set() is responsible for the following tasks: + * A dpif implementation may choose to ignore 'n_handlers' while returning + * success. * - * When receiving upcall is enabled, extends or creates the - * configuration to support: - * - * - 'n_handlers' Netlink sockets for each port. + * The method for distribution of upcalls between handler threads is + * specific to the dpif implementation. + */ + int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers); + + /* Queries 'dpif' to see if a certain number of handlers are required by + * the implementation. * - * - 'n_handlers' poll loops, one for each upcall handler. + * If a certain number of handlers are required, returns 'true' and sets + * 'n_handlers' to that number of handler threads. * - * - registering the Netlink sockets for the same upcall handler to - * the corresponding poll loop. - * */ - int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers); + * If not, returns 'false'. + */ + bool (*number_handlers_required)(struct dpif *dpif, uint32_t *n_handlers); /* Pass custom configuration options to the datapath. The implementation * might postpone applying the changes until run() is called. */ diff --git a/lib/dpif.c b/lib/dpif.c index 26e8bfb7db98..511383514d5b 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1489,6 +1489,23 @@ dpif_handlers_set(struct dpif *dpif, uint32_t n_handlers) return error; } +/* Checks if a certain number of handlers are required. + * + * If a certain number of handlers are required, returns 'true' and sets + * 'n_handlers' to that number of handler threads. + * + * If not, returns 'false' + */ +bool +dpif_number_handlers_required(struct dpif *dpif, uint32_t *n_handlers) { + bool ret = false; + + if (dpif->dpif_class->number_handlers_required) { + ret = dpif->dpif_class->number_handlers_required(dpif, n_handlers); + } + return ret; +} + void dpif_register_dp_purge_cb(struct dpif *dpif, dp_purge_callback *cb, void *aux) { diff --git a/lib/dpif.h b/lib/dpif.h index f9728e67393b..7c322d20e6c7 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -873,6 +873,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux); int dpif_recv_set(struct dpif *, bool enable); int dpif_handlers_set(struct dpif *, uint32_t n_handlers); +bool dpif_number_handlers_required(struct dpif *, uint32_t *n_handlers); int dpif_set_config(struct dpif *, const struct smap *cfg); int dpif_port_set_config(struct dpif *, odp_port_t, const struct smap *cfg); int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index d22f7f07361f..f9145bf23185 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -636,24 +636,61 @@ udpif_set_threads(struct udpif *udpif, uint32_t n_handlers_, uint32_t n_revalidators_) { ovs_assert(udpif); - ovs_assert(n_handlers_ && n_revalidators_); + uint32_t n_handlers_requested; + uint32_t n_revalidators_requested; + bool forced = false; + + if (dpif_number_handlers_required(udpif->dpif, &n_handlers_requested)) { + forced = true; + if (!n_revalidators_) { + n_revalidators_requested = n_handlers_requested / 4 + 1; + } else { + n_revalidators_requested = n_revalidators_; + } + } else { + int threads = MAX(count_cpu_cores(), 2); + + n_revalidators_requested = MAX(n_revalidators_, 0); + n_handlers_requested = MAX(n_handlers_, 0); - if (udpif->n_handlers != n_handlers_ - || udpif->n_revalidators != n_revalidators_) { + if (!n_revalidators_requested) { + n_revalidators_requested = n_handlers_requested + ? MAX(threads - (int) n_handlers_requested, 1) + : threads / 4 + 1; + } + + if (!n_handlers_requested) { + n_handlers_requested = MAX(threads - + (int) n_revalidators_requested, 1); + } + } + + if (udpif->n_handlers != n_handlers_requested + || udpif->n_revalidators != n_revalidators_requested) { + if (forced) { + VLOG_INFO("Overriding n-handler-threads to %u, setting " + "n-revalidator-threads to %u", n_handlers_requested, + n_revalidators_requested); + } else { + VLOG_INFO("Setting n-handler-threads to %u, setting " + "n-revalidator-threads to %u", n_handlers_requested, + n_revalidators_requested); + } udpif_stop_threads(udpif, true); } if (!udpif->handlers && !udpif->revalidators) { + VLOG_INFO("Starting %u threads", n_handlers_requested + + n_revalidators_requested); int error; - - error = dpif_handlers_set(udpif->dpif, n_handlers_); + error = dpif_handlers_set(udpif->dpif, n_handlers_requested); if (error) { VLOG_ERR("failed to configure handlers in dpif %s: %s", dpif_name(udpif->dpif), ovs_strerror(error)); return; } - - udpif_start_threads(udpif, n_handlers_, n_revalidators_); + udpif_start_threads(udpif, n_handlers_requested, + n_revalidators_requested); } } diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 53002f082b52..bd6103b1c88c 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -792,20 +792,8 @@ ofproto_type_set_config(const char *datapath_type, const struct smap *cfg) void ofproto_set_threads(int n_handlers_, int n_revalidators_) { - int threads = MAX(count_cpu_cores(), 2); - n_revalidators = MAX(n_revalidators_, 0); n_handlers = MAX(n_handlers_, 0); - - if (!n_revalidators) { - n_revalidators = n_handlers - ? MAX(threads - (int) n_handlers, 1) - : threads / 4 + 1; - } - - if (!n_handlers) { - n_handlers = MAX(threads - (int) n_revalidators, 1); - } } void diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index 50dad7208e85..dbba54902083 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -274,6 +274,7 @@ type). . .so lib/dpdk-unixctl.man .so lib/dpif-netdev-unixctl.man +.so lib/dpif-netlink-unixctl.man .so lib/netdev-dpdk-unixctl.man .so ofproto/ofproto-dpif-unixctl.man .so ofproto/ofproto-unixctl.man diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index e8c0b02ef96e..128d3e030d16 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -544,9 +544,9 @@ <column name="other_config" key="n-handler-threads" type='{"type": "integer", "minInteger": 1}'> - Specifies the number of threads for software datapaths to use for - handling new flows. The default the number of online CPU cores minus - the number of revalidators. + Attempts to specify the number of threads for software datapaths to + use for handling new flows. Some datapaths may choose to ignore + this and it will be set to a sensible option for the datapath type. This configuration is per datapath. If you have more than one @@ -560,12 +560,17 @@ <column name="other_config" key="n-revalidator-threads" type='{"type": "integer", "minInteger": 1}'> - Specifies the number of threads for software datapaths to use for - revalidating flows in the datapath. Typically, there is a direct - correlation between the number of revalidator threads, and the number - of flows allowed in the datapath. The default is the number of cpu - cores divided by four plus one. If <code>n-handler-threads</code> is - set, the default changes to the number of cpu cores minus the number + Attempts to specify the number of threads for software datapaths to + use for revalidating flows in the datapath. Some datapaths may + choose to ignore this and will set to a sensible option for the + datapath type. + + + Typically, there is a direct correlation between the + number of revalidator threads, and the number of flows allowed in the + datapath. The default is the number of cpu cores divided by four + plus one. If <code>n-handler-threads</code> is set, the default + changes to the number of cpu cores minus the number of handler threads.

[ovs-dev,v6,3/3] dpif-netlink: Introduce per-cpu upcall dispatch

Checks

Commit Message

Patch