[ovs-dev,v3,2/3] EPOLL Support for persistent fds
diff mbox series

Message ID 20200226115239.3192-2-anton.ivanov@cambridgegreys.com
State New
Headers show
Series
  • [ovs-dev,v3,1/3] Add file descriptor persistence where possible
Related show

Commit Message

Anton Ivanov Feb. 26, 2020, 11:52 a.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Switches FDs which are marked as persistent in persistent
poll loops to use epoll instead of poll

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 lib/poll-loop.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++-
 lib/timeval.c   |  86 ++++++++++++++++++++++++++++++++++++++++
 lib/timeval.h   |   7 ++++
 3 files changed, 194 insertions(+), 2 deletions(-)

Comments

0-day Robot Feb. 26, 2020, 12:09 p.m. UTC | #1
Bleep bloop.  Greetings Anton Ivanov, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line has trailing whitespace
#69 FILE: lib/poll-loop.c:87:
        ret |= (EPOLLHUP | EPOLLRDHUP); 

WARNING: Line has trailing whitespace
#87 FILE: lib/poll-loop.c:105:
        ret |= POLLHUP; 

WARNING: Line is 100 characters long (recommended limit is 79)
#172 FILE: lib/poll-loop.c:560:
        (struct epoll_event *) &loop->epoll_events, MAX_EPOLL_EVENTS, loop->timeout_when, &elapsed);

WARNING: Line is 84 characters long (recommended limit is 79)
#186 FILE: lib/poll-loop.c:574:
            pollfds[i].revents = epoll_to_poll_events(loop->epoll_events[i].events);

Lines checked: 355, Warnings: 4, Errors: 0


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Anton Ivanov Feb. 26, 2020, 3:22 p.m. UTC | #2
I tried to emulate poll as close as possible this time and forgot to correctly account for epoll/poll differences for POLLOUT.

This one is broken - it will be fixed in the next version.

A.

On 26/02/2020 11:52, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Switches FDs which are marked as persistent in persistent
> poll loops to use epoll instead of poll
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   lib/poll-loop.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++-
>   lib/timeval.c   |  86 ++++++++++++++++++++++++++++++++++++++++
>   lib/timeval.h   |   7 ++++
>   3 files changed, 194 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/poll-loop.c b/lib/poll-loop.c
> index 68e44eba2..5ce5977d5 100644
> --- a/lib/poll-loop.c
> +++ b/lib/poll-loop.c
> @@ -38,6 +38,14 @@ VLOG_DEFINE_THIS_MODULE(poll_loop);
>   COVERAGE_DEFINE(poll_create_node);
>   COVERAGE_DEFINE(poll_zero_timeout);
>   
> +#define MAX_EPOLL_EVENTS 64
> +
> +#ifdef __linux__
> +#define USE_EPOLL
> +#include <unistd.h>
> +#include <sys/epoll.h>
> +#endif
> +
>   struct poll_node {
>       struct hmap_node hmap_node;
>       struct pollfd pollfd;       /* Events to pass to time_poll(). */
> @@ -45,7 +53,6 @@ struct poll_node {
>       const char *where;          /* Where poll_node was created. */
>       bool valid;                 /* Marked invalid if we got a HUP/NVAL from poll */
>   };
> -
>   struct poll_loop {
>       /* All active poll waiters. */
>       struct hmap poll_nodes;
> @@ -55,10 +62,52 @@ struct poll_loop {
>       long long int timeout_when; /* In msecs as returned by time_msec(). */
>       const char *timeout_where;  /* Where 'timeout_when' was set. */
>       bool persist;
> +#ifdef USE_EPOLL
> +    int epoll_fd;
> +    struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
> +#endif
>   };
>   
>   static struct poll_loop *poll_loop(void);
>   
> +#ifdef USE_EPOLL
> +static inline int poll_to_epoll_events(short events) {
> +    int ret = 0;
> +    if (events & POLLIN) {
> +        ret |= EPOLLIN;
> +    }
> +    if (events & POLLOUT) {
> +        ret |= EPOLLOUT;
> +    }
> +    /* epoll always listens on ERR, no need to map,
> +     * epoll distinguishes between HUP and RDHUP,
> +     * they are same in poll, epoll has no NVAL
> +     */
> +    if (events & (POLLHUP | POLLNVAL)) {
> +        ret |= (EPOLLHUP | EPOLLRDHUP);
> +    }
> +    return ret;
> +}
> +
> +static inline short epoll_to_poll_events(int events) {
> +    short ret = 0;
> +    if (events & EPOLLIN) {
> +        ret |= POLLIN;
> +    }
> +    if (events & EPOLLOUT) {
> +        ret |= POLLOUT;
> +    }
> +    /* epoll always listens on ERR, no need to map,
> +     * epoll distinguishes between HUP and RDHUP,
> +     * they are same in poll, epoll has no NVAL
> +     */
> +    if (events & (EPOLLHUP | EPOLLRDHUP)) {
> +        ret |= POLLHUP;
> +    }
> +    return ret;
> +}
> +#endif
> +
>   /* Look up the node with same fd or wevent. */
>   static struct poll_node *
>   find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent)
> @@ -106,6 +155,9 @@ static struct poll_node
>   {
>       struct poll_loop *loop = poll_loop();
>       struct poll_node *node;
> +#ifdef USE_EPOLL
> +    struct epoll_event event;
> +#endif
>   
>       COVERAGE_INC(poll_create_node);
>   
> @@ -115,6 +167,13 @@ static struct poll_node
>       /* Check for duplicate.  If found, "or" the events. */
>       node = find_poll_node(loop, fd, wevent);
>       if (node) {
> +#ifdef USE_EPOLL
> +        if (loop->persist && (node->pollfd.events != events)) {
> +            event.events = poll_to_epoll_events(node->pollfd.events | events);
> +            event.data.ptr = node;
> +            epoll_ctl(loop->epoll_fd, EPOLL_CTL_MOD, fd, &event);
> +        }
> +#endif
>           node->pollfd.events |= events;
>       } else {
>           node = xzalloc(sizeof *node);
> @@ -130,6 +189,13 @@ static struct poll_node
>           node->wevent = wevent;
>           node->where = where;
>           node->valid = true;
> +#ifdef USE_EPOLL
> +        if (loop->persist) {
> +            event.events = poll_to_epoll_events(events);
> +            event.data.ptr = node;
> +            epoll_ctl(loop->epoll_fd, EPOLL_CTL_ADD, fd, &event);
> +        }
> +#endif
>       }
>       return node;
>   }
> @@ -186,6 +252,11 @@ poll_fd_deregister_at(int fd, const char *where) {
>   
>       node = find_poll_node(loop, fd, 0);
>       if (node) {
> +#ifdef USE_EPOLL
> +        if (loop->persist) {
> +            epoll_ctl(loop->epoll_fd, EPOLL_CTL_DEL, node->pollfd.fd, NULL);
> +        }
> +#endif
>           hmap_remove(&loop->poll_nodes, &node->hmap_node);
>       }
>   }
> @@ -344,6 +415,11 @@ free_poll_nodes(struct poll_loop *loop)
>   
>       HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) {
>           hmap_remove(&loop->poll_nodes, &node->hmap_node);
> +#ifdef USE_EPOLL
> +        if (loop->persist) {
> +            epoll_ctl(loop->epoll_fd, EPOLL_CTL_DEL, node->pollfd.fd, NULL);
> +        }
> +#endif
>   #ifdef _WIN32
>           if (node->wevent && node->pollfd.fd) {
>               WSAEventSelect(node->pollfd.fd, NULL, 0);
> @@ -455,6 +531,7 @@ persist_poll_block(struct poll_loop *loop)
>   
>       /* Populate with all the fds and events. */
>       counter = 0;
> +#ifndef USE_EPOLL
>       HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
>           if (node->pollfd.events && node->valid) {
>               pollfds[counter] = node->pollfd;
> @@ -478,6 +555,12 @@ persist_poll_block(struct poll_loop *loop)
>   
>       retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
>                          loop->timeout_when, &elapsed);
> +#else
> +    retval = time_epoll_wait(loop->epoll_fd,
> +        (struct epoll_event *) &loop->epoll_events, MAX_EPOLL_EVENTS, loop->timeout_when, &elapsed);
> +    counter = retval;
> +#endif
> +
>       if (retval < 0) {
>           static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>           VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
> @@ -485,7 +568,13 @@ persist_poll_block(struct poll_loop *loop)
>           log_wakeup(loop->timeout_where, NULL, elapsed);
>       } else {
>           for (i = 0; i < counter; i++) {
> +#ifdef USE_EPOLL
> +            node = loop->epoll_events[i].data.ptr;
> +            pollfds[i] = node->pollfd;
> +            pollfds[i].revents = epoll_to_poll_events(loop->epoll_events[i].events);
> +#else
>               node = find_poll_node(loop, pollfds[i].fd, 0);
> +#endif
>               if (!node) {
>                   VLOG_FATAL("poll: persistence state corrupted, no hash entry for %d", pollfds[i].fd);
>               }
> @@ -546,12 +635,19 @@ free_poll_loop(void *loop_)
>       free_poll_nodes(loop);
>       hmap_destroy(&loop->poll_nodes);
>       free(loop);
> +#ifdef USE_EPOLL
> +    if (loop->persist) {
> +        close(loop->epoll_fd);
> +    }
> +#endif
>   }
>   
>   void poll_enable_persist(void) {
>       struct poll_loop *loop = poll_loop();
> -
>       loop->persist = true;
> +#ifdef USE_EPOLL
> +    loop->epoll_fd = epoll_create(MAX_EPOLL_EVENTS);
> +#endif
>   }
>   
>   static struct poll_loop *
> @@ -573,6 +669,9 @@ poll_loop(void)
>           hmap_init(&loop->poll_nodes);
>           xpthread_setspecific(key, loop);
>           loop->persist = false;
> +#ifdef USE_EPOLL
> +        loop->epoll_fd = -1;
> +#endif
>       }
>       return loop;
>   }
> diff --git a/lib/timeval.c b/lib/timeval.c
> index 193c7bab1..6b1f1cf5a 100644
> --- a/lib/timeval.c
> +++ b/lib/timeval.c
> @@ -38,6 +38,9 @@
>   #include "unixctl.h"
>   #include "util.h"
>   #include "openvswitch/vlog.h"
> +#ifdef __linux__
> +#include <sys/epoll.h>
> +#endif
>   
>   VLOG_DEFINE_THIS_MODULE(timeval);
>   
> @@ -270,6 +273,89 @@ time_alarm(unsigned int secs)
>       deadline = now < LLONG_MAX - msecs ? now + msecs : LLONG_MAX;
>   }
>   
> +#ifdef __linux__
> +
> +/* Like epoll_wait(), except:
> + *
> + *      - The timeout is specified as an absolute time, as defined by
> + *        time_msec(), instead of a duration.
> + *
> + *      - On error, returns a negative error code (instead of setting errno).
> + *
> + *      - If interrupted by a signal, retries automatically until the original
> + *        timeout is reached.  (Because of this property, this function will
> + *        never return -EINTR.)
> + *
> + * Stores the number of milliseconds elapsed during poll in '*elapsed'. */
> +int
> +time_epoll_wait(int epoll_fd, struct epoll_event *events, int max,
> +          long long int timeout_when, int *elapsed)
> +{
> +    long long int *last_wakeup = last_wakeup_get();
> +    long long int start;
> +    bool quiescent;
> +    int retval = 0;
> +
> +    time_init();
> +    coverage_clear();
> +    coverage_run();
> +    if (*last_wakeup && !thread_is_pmd()) {
> +        log_poll_interval(*last_wakeup);
> +    }
> +    start = time_msec();
> +
> +    timeout_when = MIN(timeout_when, deadline);
> +    quiescent = ovsrcu_is_quiescent();
> +
> +    for (;;) {
> +        long long int now = time_msec();
> +        int time_left;
> +
> +        if (now >= timeout_when) {
> +            time_left = 0;
> +        } else if ((unsigned long long int) timeout_when - now > INT_MAX) {
> +            time_left = INT_MAX;
> +        } else {
> +            time_left = timeout_when - now;
> +        }
> +
> +        if (!quiescent) {
> +            if (!time_left) {
> +                ovsrcu_quiesce();
> +            } else {
> +                ovsrcu_quiesce_start();
> +            }
> +        }
> +
> +        retval = epoll_wait(epoll_fd, events, max, time_left);
> +        if (retval < 0) {
> +            retval = -errno;
> +        }
> +
> +        if (!quiescent && time_left) {
> +            ovsrcu_quiesce_end();
> +        }
> +
> +        if (deadline <= time_msec()) {
> +            fatal_signal_handler(SIGALRM);
> +            if (retval < 0) {
> +                retval = 0;
> +            }
> +            break;
> +        }
> +
> +        if (retval != -EINTR) {
> +            break;
> +        }
> +    }
> +    *last_wakeup = time_msec();
> +    refresh_rusage();
> +    *elapsed = *last_wakeup - start;
> +    return retval;
> +}
> +#endif
> +
> +
>   /* Like poll(), except:
>    *
>    *      - The timeout is specified as an absolute time, as defined by
> diff --git a/lib/timeval.h b/lib/timeval.h
> index 502f703d4..d640eab17 100644
> --- a/lib/timeval.h
> +++ b/lib/timeval.h
> @@ -20,6 +20,9 @@
>   #include <time.h>
>   #include "openvswitch/type-props.h"
>   #include "util.h"
> +#ifdef __linux__
> +#include <sys/epoll.h>
> +#endif
>   
>   #ifdef  __cplusplus
>   extern "C" {
> @@ -59,6 +62,10 @@ long long int time_wall_usec(void);
>   void time_timespec(struct timespec *);
>   void time_wall_timespec(struct timespec *);
>   void time_alarm(unsigned int secs);
> +#ifdef __linux__
> +int time_epoll_wait(int epoll_fd, struct epoll_event *events, int max,
> +          long long int timeout_when, int *elapsed);
> +#endif
>   int time_poll(struct pollfd *, int n_pollfds, HANDLE *handles,
>                 long long int timeout_when, int *elapsed);
>   
>

Patch
diff mbox series

diff --git a/lib/poll-loop.c b/lib/poll-loop.c
index 68e44eba2..5ce5977d5 100644
--- a/lib/poll-loop.c
+++ b/lib/poll-loop.c
@@ -38,6 +38,14 @@  VLOG_DEFINE_THIS_MODULE(poll_loop);
 COVERAGE_DEFINE(poll_create_node);
 COVERAGE_DEFINE(poll_zero_timeout);
 
+#define MAX_EPOLL_EVENTS 64
+
+#ifdef __linux__
+#define USE_EPOLL
+#include <unistd.h>
+#include <sys/epoll.h>
+#endif
+
 struct poll_node {
     struct hmap_node hmap_node;
     struct pollfd pollfd;       /* Events to pass to time_poll(). */
@@ -45,7 +53,6 @@  struct poll_node {
     const char *where;          /* Where poll_node was created. */
     bool valid;                 /* Marked invalid if we got a HUP/NVAL from poll */
 };
-
 struct poll_loop {
     /* All active poll waiters. */
     struct hmap poll_nodes;
@@ -55,10 +62,52 @@  struct poll_loop {
     long long int timeout_when; /* In msecs as returned by time_msec(). */
     const char *timeout_where;  /* Where 'timeout_when' was set. */
     bool persist;
+#ifdef USE_EPOLL
+    int epoll_fd;
+    struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+#endif
 };
 
 static struct poll_loop *poll_loop(void);
 
+#ifdef USE_EPOLL
+static inline int poll_to_epoll_events(short events) {
+    int ret = 0;
+    if (events & POLLIN) {
+        ret |= EPOLLIN;
+    }
+    if (events & POLLOUT) {
+        ret |= EPOLLOUT;
+    }
+    /* epoll always listens on ERR, no need to map,
+     * epoll distinguishes between HUP and RDHUP,
+     * they are same in poll, epoll has no NVAL
+     */
+    if (events & (POLLHUP | POLLNVAL)) {
+        ret |= (EPOLLHUP | EPOLLRDHUP); 
+    }
+    return ret;
+}
+
+static inline short epoll_to_poll_events(int events) {
+    short ret = 0;
+    if (events & EPOLLIN) {
+        ret |= POLLIN;
+    }
+    if (events & EPOLLOUT) {
+        ret |= POLLOUT;
+    }
+    /* epoll always listens on ERR, no need to map,
+     * epoll distinguishes between HUP and RDHUP,
+     * they are same in poll, epoll has no NVAL
+     */
+    if (events & (EPOLLHUP | EPOLLRDHUP)) {
+        ret |= POLLHUP; 
+    }
+    return ret;
+}
+#endif
+
 /* Look up the node with same fd or wevent. */
 static struct poll_node *
 find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent)
@@ -106,6 +155,9 @@  static struct poll_node
 {
     struct poll_loop *loop = poll_loop();
     struct poll_node *node;
+#ifdef USE_EPOLL
+    struct epoll_event event;
+#endif
 
     COVERAGE_INC(poll_create_node);
 
@@ -115,6 +167,13 @@  static struct poll_node
     /* Check for duplicate.  If found, "or" the events. */
     node = find_poll_node(loop, fd, wevent);
     if (node) {
+#ifdef USE_EPOLL
+        if (loop->persist && (node->pollfd.events != events)) {
+            event.events = poll_to_epoll_events(node->pollfd.events | events);
+            event.data.ptr = node;
+            epoll_ctl(loop->epoll_fd, EPOLL_CTL_MOD, fd, &event);
+        }
+#endif
         node->pollfd.events |= events;
     } else {
         node = xzalloc(sizeof *node);
@@ -130,6 +189,13 @@  static struct poll_node
         node->wevent = wevent;
         node->where = where;
         node->valid = true;
+#ifdef USE_EPOLL
+        if (loop->persist) {
+            event.events = poll_to_epoll_events(events);
+            event.data.ptr = node;
+            epoll_ctl(loop->epoll_fd, EPOLL_CTL_ADD, fd, &event);
+        }
+#endif
     }
     return node;
 }
@@ -186,6 +252,11 @@  poll_fd_deregister_at(int fd, const char *where) {
 
     node = find_poll_node(loop, fd, 0);
     if (node) {
+#ifdef USE_EPOLL
+        if (loop->persist) {
+            epoll_ctl(loop->epoll_fd, EPOLL_CTL_DEL, node->pollfd.fd, NULL);
+        }
+#endif
         hmap_remove(&loop->poll_nodes, &node->hmap_node);
     }
 }
@@ -344,6 +415,11 @@  free_poll_nodes(struct poll_loop *loop)
 
     HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) {
         hmap_remove(&loop->poll_nodes, &node->hmap_node);
+#ifdef USE_EPOLL
+        if (loop->persist) {
+            epoll_ctl(loop->epoll_fd, EPOLL_CTL_DEL, node->pollfd.fd, NULL);
+        }
+#endif
 #ifdef _WIN32
         if (node->wevent && node->pollfd.fd) {
             WSAEventSelect(node->pollfd.fd, NULL, 0);
@@ -455,6 +531,7 @@  persist_poll_block(struct poll_loop *loop)
 
     /* Populate with all the fds and events. */
     counter = 0;
+#ifndef USE_EPOLL
     HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
         if (node->pollfd.events && node->valid) {
             pollfds[counter] = node->pollfd;
@@ -478,6 +555,12 @@  persist_poll_block(struct poll_loop *loop)
 
     retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
                        loop->timeout_when, &elapsed);
+#else
+    retval = time_epoll_wait(loop->epoll_fd,
+        (struct epoll_event *) &loop->epoll_events, MAX_EPOLL_EVENTS, loop->timeout_when, &elapsed);
+    counter = retval;
+#endif
+
     if (retval < 0) {
         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
         VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
@@ -485,7 +568,13 @@  persist_poll_block(struct poll_loop *loop)
         log_wakeup(loop->timeout_where, NULL, elapsed);
     } else {
         for (i = 0; i < counter; i++) {
+#ifdef USE_EPOLL
+            node = loop->epoll_events[i].data.ptr;
+            pollfds[i] = node->pollfd;
+            pollfds[i].revents = epoll_to_poll_events(loop->epoll_events[i].events);
+#else
             node = find_poll_node(loop, pollfds[i].fd, 0);
+#endif
             if (!node) {
                 VLOG_FATAL("poll: persistence state corrupted, no hash entry for %d", pollfds[i].fd);
             }
@@ -546,12 +635,19 @@  free_poll_loop(void *loop_)
     free_poll_nodes(loop);
     hmap_destroy(&loop->poll_nodes);
     free(loop);
+#ifdef USE_EPOLL
+    if (loop->persist) {
+        close(loop->epoll_fd);
+    }
+#endif
 }
 
 void poll_enable_persist(void) {
     struct poll_loop *loop = poll_loop();
-
     loop->persist = true;
+#ifdef USE_EPOLL
+    loop->epoll_fd = epoll_create(MAX_EPOLL_EVENTS);
+#endif
 }
 
 static struct poll_loop *
@@ -573,6 +669,9 @@  poll_loop(void)
         hmap_init(&loop->poll_nodes);
         xpthread_setspecific(key, loop);
         loop->persist = false;
+#ifdef USE_EPOLL
+        loop->epoll_fd = -1;
+#endif
     }
     return loop;
 }
diff --git a/lib/timeval.c b/lib/timeval.c
index 193c7bab1..6b1f1cf5a 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -38,6 +38,9 @@ 
 #include "unixctl.h"
 #include "util.h"
 #include "openvswitch/vlog.h"
+#ifdef __linux__
+#include <sys/epoll.h>
+#endif
 
 VLOG_DEFINE_THIS_MODULE(timeval);
 
@@ -270,6 +273,89 @@  time_alarm(unsigned int secs)
     deadline = now < LLONG_MAX - msecs ? now + msecs : LLONG_MAX;
 }
 
+#ifdef __linux__
+
+/* Like epoll_wait(), except:
+ *
+ *      - The timeout is specified as an absolute time, as defined by
+ *        time_msec(), instead of a duration.
+ *
+ *      - On error, returns a negative error code (instead of setting errno).
+ *
+ *      - If interrupted by a signal, retries automatically until the original
+ *        timeout is reached.  (Because of this property, this function will
+ *        never return -EINTR.)
+ *
+ * Stores the number of milliseconds elapsed during poll in '*elapsed'. */
+int
+time_epoll_wait(int epoll_fd, struct epoll_event *events, int max,
+          long long int timeout_when, int *elapsed)
+{
+    long long int *last_wakeup = last_wakeup_get();
+    long long int start;
+    bool quiescent;
+    int retval = 0;
+
+    time_init();
+    coverage_clear();
+    coverage_run();
+    if (*last_wakeup && !thread_is_pmd()) {
+        log_poll_interval(*last_wakeup);
+    }
+    start = time_msec();
+
+    timeout_when = MIN(timeout_when, deadline);
+    quiescent = ovsrcu_is_quiescent();
+
+    for (;;) {
+        long long int now = time_msec();
+        int time_left;
+
+        if (now >= timeout_when) {
+            time_left = 0;
+        } else if ((unsigned long long int) timeout_when - now > INT_MAX) {
+            time_left = INT_MAX;
+        } else {
+            time_left = timeout_when - now;
+        }
+
+        if (!quiescent) {
+            if (!time_left) {
+                ovsrcu_quiesce();
+            } else {
+                ovsrcu_quiesce_start();
+            }
+        }
+
+        retval = epoll_wait(epoll_fd, events, max, time_left);
+        if (retval < 0) {
+            retval = -errno;
+        }
+
+        if (!quiescent && time_left) {
+            ovsrcu_quiesce_end();
+        }
+
+        if (deadline <= time_msec()) {
+            fatal_signal_handler(SIGALRM);
+            if (retval < 0) {
+                retval = 0;
+            }
+            break;
+        }
+
+        if (retval != -EINTR) {
+            break;
+        }
+    }
+    *last_wakeup = time_msec();
+    refresh_rusage();
+    *elapsed = *last_wakeup - start;
+    return retval;
+}
+#endif
+
+
 /* Like poll(), except:
  *
  *      - The timeout is specified as an absolute time, as defined by
diff --git a/lib/timeval.h b/lib/timeval.h
index 502f703d4..d640eab17 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -20,6 +20,9 @@ 
 #include <time.h>
 #include "openvswitch/type-props.h"
 #include "util.h"
+#ifdef __linux__
+#include <sys/epoll.h>
+#endif
 
 #ifdef  __cplusplus
 extern "C" {
@@ -59,6 +62,10 @@  long long int time_wall_usec(void);
 void time_timespec(struct timespec *);
 void time_wall_timespec(struct timespec *);
 void time_alarm(unsigned int secs);
+#ifdef __linux__
+int time_epoll_wait(int epoll_fd, struct epoll_event *events, int max,
+          long long int timeout_when, int *elapsed);
+#endif
 int time_poll(struct pollfd *, int n_pollfds, HANDLE *handles,
               long long int timeout_when, int *elapsed);