@@ -167,11 +167,10 @@ struct dpif_windows_vport_sock {
#endif
struct dpif_handler {
- struct dpif_channel *channels;/* Array of channels for each handler. */
- struct epoll_event *epoll_events;
+ struct dpif_channel channels;
+ struct epoll_event epoll_events;
int epoll_fd; /* epoll fd that includes channel socks. */
int n_events; /* Num events returned by epoll_wait(). */
- int event_offset; /* Offset into 'epoll_events'. */
#ifdef _WIN32
/* Pool of sockets. */
@@ -190,16 +189,16 @@ struct dpif_netlink {
struct fat_rwlock upcall_lock;
struct dpif_handler *handlers;
uint32_t n_handlers; /* Num of upcall handlers. */
- int uc_array_size; /* Size of 'handler->channels' and */
- /* 'handler->epoll_events'. */
/* Change notification. */
struct nl_sock *port_notifier; /* vport multicast group subscriber. */
bool refresh_channels;
};
+/*
static void report_loss(struct dpif_netlink *, struct dpif_channel *,
uint32_t ch_idx, uint32_t handler_id);
+*/
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
@@ -508,8 +507,7 @@ vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
/* Given the port number 'port_idx', extracts the pids of netlink sockets
* associated to the port and assigns it to 'upcall_pids'. */
static bool
-vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
- uint32_t **upcall_pids)
+vport_get_pids(struct dpif_netlink *dpif, uint32_t **upcall_pids)
{
uint32_t *pids;
size_t i;
@@ -517,7 +515,7 @@ vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
/* Since the nl_sock can only be assigned in either all
* or none "dpif->handlers" channels, the following check
* would suffice. */
- if (!dpif->handlers[0].channels[port_idx].sock) {
+ if (!dpif->handlers[0].channels.sock) {
return false;
}
ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
@@ -525,7 +523,7 @@ vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
pids = xzalloc(dpif->n_handlers * sizeof *pids);
for (i = 0; i < dpif->n_handlers; i++) {
- pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
+ pids[i] = nl_sock_pid(dpif->handlers[i].channels.sock);
}
*upcall_pids = pids;
@@ -542,37 +540,17 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
size_t i, j;
int error;
- if (dpif->handlers == NULL) {
+ /* Since the sock can only be assigned in either all or none
+ * of "dpif->handlers" channels, the following check would
+ * suffice. */
+ if (dpif->handlers == NULL || dpif->handlers[0].channels.sock) {
return 0;
}
- /* We assume that the datapath densely chooses port numbers, which can
- * therefore be used as an index into 'channels' and 'epoll_events' of
- * 'dpif->handler'. */
- if (port_idx >= dpif->uc_array_size) {
- uint32_t new_size = port_idx + 1;
-
- if (new_size > MAX_PORTS) {
- VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
- dpif_name(&dpif->dpif), port_no);
- return EFBIG;
- }
-
- for (i = 0; i < dpif->n_handlers; i++) {
- struct dpif_handler *handler = &dpif->handlers[i];
-
- handler->channels = xrealloc(handler->channels,
- new_size * sizeof *handler->channels);
-
- for (j = dpif->uc_array_size; j < new_size; j++) {
- handler->channels[j].sock = NULL;
- }
-
- handler->epoll_events = xrealloc(handler->epoll_events,
- new_size * sizeof *handler->epoll_events);
+ for (i = 0; i < dpif->n_handlers; i++) {
+ struct dpif_handler *handler = &dpif->handlers[i];
- }
- dpif->uc_array_size = new_size;
+ handler->channels.sock = NULL;
}
memset(&event, 0, sizeof event);
@@ -589,8 +567,8 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
goto error;
}
#endif
- dpif->handlers[i].channels[port_idx].sock = socksp[i];
- dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
+ dpif->handlers[i].channels.sock = socksp[i];
+ dpif->handlers[i].channels.last_poll = LLONG_MIN;
}
return 0;
@@ -601,26 +579,25 @@ error:
epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
nl_sock_fd(socksp[j]), NULL);
#endif
- dpif->handlers[j].channels[port_idx].sock = NULL;
+ dpif->handlers[j].channels.sock = NULL;
}
return error;
}
static void
-vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
+vport_del_channels(struct dpif_netlink *dpif)
{
- uint32_t port_idx = odp_to_u32(port_no);
size_t i;
- if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
+ if (!dpif->handlers) {
return;
}
/* Since the sock can only be assigned in either all or none
* of "dpif->handlers" channels, the following check would
* suffice. */
- if (!dpif->handlers[0].channels[port_idx].sock) {
+ if (!dpif->handlers[0].channels.sock) {
return;
}
@@ -628,11 +605,10 @@ vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
struct dpif_handler *handler = &dpif->handlers[i];
#ifndef _WIN32
epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
- nl_sock_fd(handler->channels[port_idx].sock), NULL);
- nl_sock_destroy(handler->channels[port_idx].sock);
+ nl_sock_fd(handler->channels.sock), NULL);
+ nl_sock_destroy(handler->channels.sock);
#endif
- handler->channels[port_idx].sock = NULL;
- handler->event_offset = handler->n_events = 0;
+ handler->channels.sock = NULL;
}
}
@@ -646,41 +622,33 @@ destroy_all_channels(struct dpif_netlink *dpif)
return;
}
- for (i = 0; i < dpif->uc_array_size; i++ ) {
- struct dpif_netlink_vport vport_request;
- uint32_t upcall_pids = 0;
-
- /* Since the sock can only be assigned in either all or none
- * of "dpif->handlers" channels, the following check would
- * suffice. */
- if (!dpif->handlers[0].channels[i].sock) {
- continue;
- }
+ struct dpif_netlink_vport vport_request;
+ uint32_t upcall_pids = 0;
+ /* Since the sock can only be assigned in either all or none
+ * of "dpif->handlers" channels, the following check would
+ * suffice. */
+ if (dpif->handlers[0].channels.sock) {
/* Turn off upcalls. */
dpif_netlink_vport_init(&vport_request);
vport_request.cmd = OVS_VPORT_CMD_SET;
vport_request.dp_ifindex = dpif->dp_ifindex;
- vport_request.port_no = u32_to_odp(i);
vport_request.n_upcall_pids = 1;
vport_request.upcall_pids = &upcall_pids;
dpif_netlink_vport_transact(&vport_request, NULL, NULL);
- vport_del_channels(dpif, u32_to_odp(i));
+ vport_del_channels(dpif);
}
for (i = 0; i < dpif->n_handlers; i++) {
struct dpif_handler *handler = &dpif->handlers[i];
dpif_netlink_handler_uninit(handler);
- free(handler->epoll_events);
- free(handler->channels);
}
free(dpif->handlers);
dpif->handlers = NULL;
dpif->n_handlers = 0;
- dpif->uc_array_size = 0;
}
static void
@@ -1033,7 +1001,7 @@ dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
#endif
error = dpif_netlink_vport_transact(&vport, NULL, NULL);
- vport_del_channels(dpif, port_no);
+ vport_del_channels(dpif);
if (!error && !ovs_tunnels_out_of_tree) {
error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
@@ -1110,25 +1078,22 @@ dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
}
static uint32_t
-dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
- odp_port_t port_no, uint32_t hash)
+dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif, uint32_t hash)
OVS_REQ_RDLOCK(dpif->upcall_lock)
{
- uint32_t port_idx = odp_to_u32(port_no);
uint32_t pid = 0;
- if (dpif->handlers && dpif->uc_array_size > 0) {
+ if (dpif->handlers) {
/* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
* channel, since it is not heavily loaded. */
- uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
/* Needs to check in case the socket pointer is changed in between
* the holding of upcall_lock. A known case happens when the main
* thread deletes the vport while the handler thread is handling
* the upcall from that port. */
- if (h->channels[idx].sock) {
- pid = nl_sock_pid(h->channels[idx].sock);
+ if (h->channels.sock) {
+ pid = nl_sock_pid(h->channels.sock);
}
}
@@ -1136,14 +1101,14 @@ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
}
static uint32_t
-dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
- uint32_t hash)
+dpif_netlink_port_get_pid(const struct dpif *dpif_,
+ odp_port_t OVS_UNUSED port_no, uint32_t hash)
{
const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
uint32_t ret;
fat_rwlock_rdlock(&dpif->upcall_lock);
- ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
+ ret = dpif_netlink_port_get_pid__(dpif, hash);
fat_rwlock_unlock(&dpif->upcall_lock);
return ret;
@@ -2316,9 +2281,7 @@ static int
dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
OVS_REQ_WRLOCK(dpif->upcall_lock)
{
- unsigned long int *keep_channels;
struct dpif_netlink_vport vport;
- size_t keep_channels_nbits;
struct nl_dump dump;
uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
struct ofpbuf buf;
@@ -2355,21 +2318,16 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
for (i = 0; i < n_handlers; i++) {
struct dpif_handler *handler = &dpif->handlers[i];
- handler->event_offset = handler->n_events = 0;
+ handler->n_events = 0;
}
- keep_channels_nbits = dpif->uc_array_size;
- keep_channels = bitmap_allocate(keep_channels_nbits);
-
ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
dpif_netlink_port_dump_start__(dpif, &dump);
while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
- uint32_t port_no = odp_to_u32(vport.port_no);
uint32_t *upcall_pids = NULL;
int error;
- if (port_no >= dpif->uc_array_size
- || !vport_get_pids(dpif, port_no, &upcall_pids)) {
+ if (!vport_get_pids(dpif, &upcall_pids)) {
struct nl_sock **socksp = vport_create_socksp(dpif, &error);
if (!socksp) {
@@ -2418,27 +2376,16 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
}
}
- if (port_no < keep_channels_nbits) {
- bitmap_set1(keep_channels, port_no);
- }
free(upcall_pids);
continue;
error:
free(upcall_pids);
- vport_del_channels(dpif, vport.port_no);
+ vport_del_channels(dpif);
}
nl_dump_done(&dump);
ofpbuf_uninit(&buf);
- /* Discard any saved channels that we didn't reuse. */
- for (i = 0; i < keep_channels_nbits; i++) {
- if (!bitmap_is_set(keep_channels, i)) {
- vport_del_channels(dpif, u32_to_odp(i));
- }
- }
- free(keep_channels);
-
return retval;
}
@@ -2664,14 +2611,12 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
}
handler = &dpif->handlers[handler_id];
- if (handler->event_offset >= handler->n_events) {
+ if (handler->n_events) {
int retval;
- handler->event_offset = handler->n_events = 0;
-
do {
- retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
- dpif->uc_array_size, 0);
+ retval = epoll_wait(handler->epoll_fd, &handler->epoll_events,
+ 1, 0);
} while (retval < 0 && errno == EINTR);
if (retval < 0) {
@@ -2682,11 +2627,8 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
}
}
- while (handler->event_offset < handler->n_events) {
- int idx = handler->epoll_events[handler->event_offset].data.u32;
- struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
-
- handler->event_offset++;
+ while (!handler->n_events) {
+ struct dpif_channel *ch = &dpif->handlers[handler_id].channels;
for (;;) {
int dp_ifindex;
@@ -2702,7 +2644,10 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
* packets that the buffer overflowed. Try again
* immediately because there's almost certainly a packet
* waiting for us. */
+ /* FIXME
+ * idx is no longer available
report_loss(dpif, ch, idx, handler_id);
+ */
continue;
}
@@ -2786,15 +2731,11 @@ dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
OVS_REQ_WRLOCK(dpif->upcall_lock)
{
if (dpif->handlers) {
- size_t i, j;
-
- for (i = 0; i < dpif->uc_array_size; i++ ) {
- if (!dpif->handlers[0].channels[i].sock) {
- continue;
- }
+ size_t i;
- for (j = 0; j < dpif->n_handlers; j++) {
- nl_sock_drain(dpif->handlers[j].channels[i].sock);
+ if (dpif->handlers[0].channels.sock) {
+ for (i = 0; i < dpif->n_handlers; i++) {
+ nl_sock_drain(dpif->handlers[i].channels.sock);
}
}
}
@@ -3599,6 +3540,7 @@ dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
/* Logs information about a packet that was recently lost in 'ch' (in
* 'dpif_'). */
+/* FIXME - unused for now
static void
report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
uint32_t handler_id)
@@ -3620,3 +3562,4 @@ report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
dpif_name(&dpif->dpif), ch_idx, handler_id);
ds_destroy(&s);
}
+*/
When using the kernel datapath OVS allocates a pool of sockets to handle netlink events. The number of sockets is: ports * n-handler-threads. n-handler-threads is user configurable but defaults to the number of cores. On machines with lot of CPUs and ports, the number of sockets easily hits the process file descriptor limit, which is 65536, and will cause ovs-vswitchd to abort. Change the number of allocated sockets to just n-handler-threads which, if set slighty greater than the number of cores, is enough to handle to handler the netlink events. Replace the struct dpif_channel array with a single dpif_channel instance and edit the code accordingly. By doing so, the port idx information is lost in upcall event code, so the call to report_loss() must be commented, as it already is in the Windows code path. Signed-off-by: Matteo Croce <mcroce@redhat.com> --- lib/dpif-netlink.c | 167 ++++++++++++++++++----------------------------------- 1 file changed, 55 insertions(+), 112 deletions(-)