Message ID | 20210720150628.32499-1-anton.ivanov@cambridgegreys.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev,v4,1/2] Optimize the poll loop for poll_immediate_wake() | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot | fail | github build: failed |
On Tue, Jul 20, 2021, at 17:06, anton.ivanov@cambridgegreys.com wrote: > From: Anton Ivanov <anton.ivanov@cambridgegreys.com> > > If we are not obtaining any useful information out of the poll(), > such as is a fd busy or not, we do not need to do a poll() if > an immediate_wake() has been requested. > > This cuts out all the pollfd hash additions, forming the poll > arguments and the actual poll() after a call to > poll_immediate_wake() > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com> Hi Anton, Patch generally looks good to me. A few nits below. > --- > lib/poll-loop.c | 80 ++++++++++++++++++++++++++++++++----------------- > lib/timeval.c | 13 +++++++- > 2 files changed, 65 insertions(+), 28 deletions(-) > > diff --git a/lib/poll-loop.c b/lib/poll-loop.c > index 4e751ff2c..9452f6e22 100644 > --- a/lib/poll-loop.c > +++ b/lib/poll-loop.c > @@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int > events, const char *where) > > COVERAGE_INC(poll_create_node); > > + if (loop->timeout_when == LLONG_MIN) { > + /* There was a previous request to bail out of this poll loop. > + * There is no point to engage in yack shaving a poll hmap. > + */ > + return; > + } > + > /* Both 'fd' and 'wevent' cannot be set. */ > ovs_assert(!fd != !wevent); > > @@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where) > void > poll_timer_wait_at(long long int msec, const char *where) > { > - long long int now = time_msec(); > + long long int now; > long long int when; > + struct poll_loop *loop = poll_loop(); > + > + /* We have an outstanding request for an immediate wake - > + * either explicit (from poll_immediate_wake()) or as a > + * result of a previous timeout calculation which gave > + * a negative timeout. > + * No point trying to recalculate the timeout. > + */ > + if (loop->timeout_when == LLONG_MIN) { > + return; > + } > + > + now = time_msec(); > > if (msec <= 0) { > /* Wake up immediately. */ > @@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const > char *where) > void > poll_immediate_wake_at(const char *where) > { > - poll_timer_wait_at(0, where); > + poll_timer_wait_at(LLONG_MIN, where); > } > > /* Logs, if appropriate, that the poll loop was awakened by an event > @@ -320,49 +340,55 @@ poll_block(void) > { > struct poll_loop *loop = poll_loop(); > struct poll_node *node; > - struct pollfd *pollfds; > + struct pollfd *pollfds = NULL; > HANDLE *wevents = NULL; > int elapsed; > - int retval; > + int retval = 0; > int i; > > - /* Register fatal signal events before actually doing any real work for > - * poll_block. */ > - fatal_signal_wait(); > > if (loop->timeout_when == LLONG_MIN) { > COVERAGE_INC(poll_zero_timeout); > + } else { > + /* Register fatal signal events only if we intend to do real work for > + * poll_block. > + */ > + fatal_signal_wait(); When should we avoid registering fatal signal events? Even for immediate wake, time_poll() will execute for general upkeep (coverage + RCU), should a fatal signal handler not be installed in that case? > } > > timewarp_run(); > - pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); > + if (loop->timeout_when != LLONG_MIN) { > + pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); > > #ifdef _WIN32 > - wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); > + wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); > #endif > > - /* Populate with all the fds and events. */ > - i = 0; > - HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { > - pollfds[i] = node->pollfd; > + /* Populate with all the fds and events. */ > + i = 0; > + HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { > + pollfds[i] = node->pollfd; > #ifdef _WIN32 > - wevents[i] = node->wevent; > - if (node->pollfd.fd && node->wevent) { > - short int wsa_events = 0; > - if (node->pollfd.events & POLLIN) { > - wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; > + wevents[i] = node->wevent; > + if (node->pollfd.fd && node->wevent) { > + short int wsa_events = 0; > + if (node->pollfd.events & POLLIN) { > + wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; > + } > + if (node->pollfd.events & POLLOUT) { > + wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; > + } > + WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); > } > - if (node->pollfd.events & POLLOUT) { > - wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; > - } > - WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); > - } > #endif > - i++; > - } > + i++; > + } > > - retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, > - loop->timeout_when, &elapsed); > + retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), > wevents, > + loop->timeout_when, &elapsed); > + } else { > + retval = time_poll(NULL, 0, NULL, loop->timeout_when, > &elapsed); > + } > if (retval < 0) { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); > diff --git a/lib/timeval.c b/lib/timeval.c > index 193c7bab1..ef09a15e0 100644 > --- a/lib/timeval.c > +++ b/lib/timeval.c > @@ -305,6 +305,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, > HANDLE *handles OVS_UNUSED, > for (;;) { > long long int now = time_msec(); > int time_left; > + retval = 0; > > if (now >= timeout_when) { > time_left = 0; > @@ -323,7 +324,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds, > HANDLE *handles OVS_UNUSED, > } > > #ifndef _WIN32 > - retval = poll(pollfds, n_pollfds, time_left); > + /* If timeout_when is LLONG_MIN, we should skip the poll(). > + * We do not skip on time_left == 0, because we may have > + * ended up with time_left = 0 after a poll with valid > + * pollfds was interrupted and restarted. > + */ > + if (timeout_when != LLONG_MIN) { The _WIN32 case checks 'n_pollfds != 0' before calling. Should both be aligned? Maybe the _WIN32 should avoid the call if 'n_pollfds == 0 || timeout_when == LLONG_MIN'? > + retval = poll(pollfds, n_pollfds, time_left); > + } > if (retval < 0) { > retval = -errno; > } > @@ -331,6 +339,9 @@ time_poll(struct pollfd *pollfds, int n_pollfds, > HANDLE *handles OVS_UNUSED, > if (n_pollfds > MAXIMUM_WAIT_OBJECTS) { > VLOG_ERR("Cannot handle more than maximum wait objects\n"); > } else if (n_pollfds != 0) { > + /* If we are doing an immediate_wake shortcut n_pollfds is > + * zero, so we skip the actual call. > + */ I find this sentence hard to read. I would suggest instead: /* n_pollfds can be zero on immediate_wake. * In that case the call is not necessary. */ > retval = WaitForMultipleObjects(n_pollfds, handles, FALSE, > time_left); > } > -- > 2.20.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On 03/08/2021 00:29, Gaëtan Rivet wrote: > On Tue, Jul 20, 2021, at 17:06, anton.ivanov@cambridgegreys.com wrote: >> From: Anton Ivanov <anton.ivanov@cambridgegreys.com> >> >> If we are not obtaining any useful information out of the poll(), >> such as is a fd busy or not, we do not need to do a poll() if >> an immediate_wake() has been requested. >> >> This cuts out all the pollfd hash additions, forming the poll >> arguments and the actual poll() after a call to >> poll_immediate_wake() >> >> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com> > Hi Anton, > > Patch generally looks good to me. A few nits below. > >> --- >> lib/poll-loop.c | 80 ++++++++++++++++++++++++++++++++----------------- >> lib/timeval.c | 13 +++++++- >> 2 files changed, 65 insertions(+), 28 deletions(-) >> >> diff --git a/lib/poll-loop.c b/lib/poll-loop.c >> index 4e751ff2c..9452f6e22 100644 >> --- a/lib/poll-loop.c >> +++ b/lib/poll-loop.c >> @@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int >> events, const char *where) >> >> COVERAGE_INC(poll_create_node); >> >> + if (loop->timeout_when == LLONG_MIN) { >> + /* There was a previous request to bail out of this poll loop. >> + * There is no point to engage in yack shaving a poll hmap. >> + */ >> + return; >> + } >> + >> /* Both 'fd' and 'wevent' cannot be set. */ >> ovs_assert(!fd != !wevent); >> >> @@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where) >> void >> poll_timer_wait_at(long long int msec, const char *where) >> { >> - long long int now = time_msec(); >> + long long int now; >> long long int when; >> + struct poll_loop *loop = poll_loop(); >> + >> + /* We have an outstanding request for an immediate wake - >> + * either explicit (from poll_immediate_wake()) or as a >> + * result of a previous timeout calculation which gave >> + * a negative timeout. >> + * No point trying to recalculate the timeout. >> + */ >> + if (loop->timeout_when == LLONG_MIN) { >> + return; >> + } >> + >> + now = time_msec(); >> >> if (msec <= 0) { >> /* Wake up immediately. */ >> @@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const >> char *where) >> void >> poll_immediate_wake_at(const char *where) >> { >> - poll_timer_wait_at(0, where); >> + poll_timer_wait_at(LLONG_MIN, where); >> } >> >> /* Logs, if appropriate, that the poll loop was awakened by an event >> @@ -320,49 +340,55 @@ poll_block(void) >> { >> struct poll_loop *loop = poll_loop(); >> struct poll_node *node; >> - struct pollfd *pollfds; >> + struct pollfd *pollfds = NULL; >> HANDLE *wevents = NULL; >> int elapsed; >> - int retval; >> + int retval = 0; >> int i; >> >> - /* Register fatal signal events before actually doing any real work for >> - * poll_block. */ >> - fatal_signal_wait(); >> >> if (loop->timeout_when == LLONG_MIN) { >> COVERAGE_INC(poll_zero_timeout); >> + } else { >> + /* Register fatal signal events only if we intend to do real work for >> + * poll_block. >> + */ >> + fatal_signal_wait(); > When should we avoid registering fatal signal events? > Even for immediate wake, time_poll() will execute for general > upkeep (coverage + RCU), should a fatal signal handler not > be installed in that case? The fatal signal events are handled in ALL cases in the fatal_signal_run() which is towards the end of the poll loop. fatal_singal_wait only adds the pipe to the fds. If we are not polling on that fd anyway, no point adding it either. > >> } >> >> timewarp_run(); >> - pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); >> + if (loop->timeout_when != LLONG_MIN) { >> + pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); >> >> #ifdef _WIN32 >> - wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); >> + wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); >> #endif >> >> - /* Populate with all the fds and events. */ >> - i = 0; >> - HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { >> - pollfds[i] = node->pollfd; >> + /* Populate with all the fds and events. */ >> + i = 0; >> + HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { >> + pollfds[i] = node->pollfd; >> #ifdef _WIN32 >> - wevents[i] = node->wevent; >> - if (node->pollfd.fd && node->wevent) { >> - short int wsa_events = 0; >> - if (node->pollfd.events & POLLIN) { >> - wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; >> + wevents[i] = node->wevent; >> + if (node->pollfd.fd && node->wevent) { >> + short int wsa_events = 0; >> + if (node->pollfd.events & POLLIN) { >> + wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; >> + } >> + if (node->pollfd.events & POLLOUT) { >> + wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; >> + } >> + WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); >> } >> - if (node->pollfd.events & POLLOUT) { >> - wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; >> - } >> - WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); >> - } >> #endif >> - i++; >> - } >> + i++; >> + } >> >> - retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, >> - loop->timeout_when, &elapsed); >> + retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), >> wevents, >> + loop->timeout_when, &elapsed); >> + } else { >> + retval = time_poll(NULL, 0, NULL, loop->timeout_when, >> &elapsed); >> + } >> if (retval < 0) { >> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); >> VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); >> diff --git a/lib/timeval.c b/lib/timeval.c >> index 193c7bab1..ef09a15e0 100644 >> --- a/lib/timeval.c >> +++ b/lib/timeval.c >> @@ -305,6 +305,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, >> HANDLE *handles OVS_UNUSED, >> for (;;) { >> long long int now = time_msec(); >> int time_left; >> + retval = 0; >> >> if (now >= timeout_when) { >> time_left = 0; >> @@ -323,7 +324,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds, >> HANDLE *handles OVS_UNUSED, >> } >> >> #ifndef _WIN32 >> - retval = poll(pollfds, n_pollfds, time_left); >> + /* If timeout_when is LLONG_MIN, we should skip the poll(). >> + * We do not skip on time_left == 0, because we may have >> + * ended up with time_left = 0 after a poll with valid >> + * pollfds was interrupted and restarted. >> + */ >> + if (timeout_when != LLONG_MIN) { > The _WIN32 case checks 'n_pollfds != 0' before calling. > Should both be aligned? Maybe the _WIN32 should avoid the call if > 'n_pollfds == 0 || timeout_when == LLONG_MIN'? Sure. I try to avoid touching the WIN32 code as I do not have an environment to test it. > >> + retval = poll(pollfds, n_pollfds, time_left); >> + } >> if (retval < 0) { >> retval = -errno; >> } >> @@ -331,6 +339,9 @@ time_poll(struct pollfd *pollfds, int n_pollfds, >> HANDLE *handles OVS_UNUSED, >> if (n_pollfds > MAXIMUM_WAIT_OBJECTS) { >> VLOG_ERR("Cannot handle more than maximum wait objects\n"); >> } else if (n_pollfds != 0) { >> + /* If we are doing an immediate_wake shortcut n_pollfds is >> + * zero, so we skip the actual call. >> + */ > I find this sentence hard to read. > I would suggest instead: > > /* n_pollfds can be zero on immediate_wake. > * In that case the call is not necessary. */ > >> retval = WaitForMultipleObjects(n_pollfds, handles, FALSE, >> time_left); >> } >> -- >> 2.20.1 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev >> >
diff --git a/lib/poll-loop.c b/lib/poll-loop.c index 4e751ff2c..9452f6e22 100644 --- a/lib/poll-loop.c +++ b/lib/poll-loop.c @@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int events, const char *where) COVERAGE_INC(poll_create_node); + if (loop->timeout_when == LLONG_MIN) { + /* There was a previous request to bail out of this poll loop. + * There is no point to engage in yack shaving a poll hmap. + */ + return; + } + /* Both 'fd' and 'wevent' cannot be set. */ ovs_assert(!fd != !wevent); @@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where) void poll_timer_wait_at(long long int msec, const char *where) { - long long int now = time_msec(); + long long int now; long long int when; + struct poll_loop *loop = poll_loop(); + + /* We have an outstanding request for an immediate wake - + * either explicit (from poll_immediate_wake()) or as a + * result of a previous timeout calculation which gave + * a negative timeout. + * No point trying to recalculate the timeout. + */ + if (loop->timeout_when == LLONG_MIN) { + return; + } + + now = time_msec(); if (msec <= 0) { /* Wake up immediately. */ @@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const char *where) void poll_immediate_wake_at(const char *where) { - poll_timer_wait_at(0, where); + poll_timer_wait_at(LLONG_MIN, where); } /* Logs, if appropriate, that the poll loop was awakened by an event @@ -320,49 +340,55 @@ poll_block(void) { struct poll_loop *loop = poll_loop(); struct poll_node *node; - struct pollfd *pollfds; + struct pollfd *pollfds = NULL; HANDLE *wevents = NULL; int elapsed; - int retval; + int retval = 0; int i; - /* Register fatal signal events before actually doing any real work for - * poll_block. */ - fatal_signal_wait(); if (loop->timeout_when == LLONG_MIN) { COVERAGE_INC(poll_zero_timeout); + } else { + /* Register fatal signal events only if we intend to do real work for + * poll_block. + */ + fatal_signal_wait(); } timewarp_run(); - pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); + if (loop->timeout_when != LLONG_MIN) { + pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); #ifdef _WIN32 - wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); + wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); #endif - /* Populate with all the fds and events. */ - i = 0; - HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { - pollfds[i] = node->pollfd; + /* Populate with all the fds and events. */ + i = 0; + HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { + pollfds[i] = node->pollfd; #ifdef _WIN32 - wevents[i] = node->wevent; - if (node->pollfd.fd && node->wevent) { - short int wsa_events = 0; - if (node->pollfd.events & POLLIN) { - wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; + wevents[i] = node->wevent; + if (node->pollfd.fd && node->wevent) { + short int wsa_events = 0; + if (node->pollfd.events & POLLIN) { + wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; + } + if (node->pollfd.events & POLLOUT) { + wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; + } + WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); } - if (node->pollfd.events & POLLOUT) { - wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; - } - WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); - } #endif - i++; - } + i++; + } - retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, - loop->timeout_when, &elapsed); + retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, + loop->timeout_when, &elapsed); + } else { + retval = time_poll(NULL, 0, NULL, loop->timeout_when, &elapsed); + } if (retval < 0) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); diff --git a/lib/timeval.c b/lib/timeval.c index 193c7bab1..ef09a15e0 100644 --- a/lib/timeval.c +++ b/lib/timeval.c @@ -305,6 +305,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED, for (;;) { long long int now = time_msec(); int time_left; + retval = 0; if (now >= timeout_when) { time_left = 0; @@ -323,7 +324,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED, } #ifndef _WIN32 - retval = poll(pollfds, n_pollfds, time_left); + /* If timeout_when is LLONG_MIN, we should skip the poll(). + * We do not skip on time_left == 0, because we may have + * ended up with time_left = 0 after a poll with valid + * pollfds was interrupted and restarted. + */ + if (timeout_when != LLONG_MIN) { + retval = poll(pollfds, n_pollfds, time_left); + } if (retval < 0) { retval = -errno; } @@ -331,6 +339,9 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED, if (n_pollfds > MAXIMUM_WAIT_OBJECTS) { VLOG_ERR("Cannot handle more than maximum wait objects\n"); } else if (n_pollfds != 0) { + /* If we are doing an immediate_wake shortcut n_pollfds is + * zero, so we skip the actual call. + */ retval = WaitForMultipleObjects(n_pollfds, handles, FALSE, time_left); }