Message ID | 4E81D609.1060203@siemens.com |
---|---|
State | New |
Headers | show |
On 09/27/2011 08:56 AM, Jan Kiszka wrote: > Move qemu_eventfd unmodified to oslib-posix and use it for signaling > POSIX AIO completions. If native eventfd suport is available, this > avoids multiple read accesses to drain multiple pending signals. As > before we use a pipe if eventfd is not supported. > > Signed-off-by: Jan Kiszka<jan.kiszka@siemens.com> > --- > os-posix.c | 32 -------------------------------- > oslib-posix.c | 32 +++++++++++++++++++++++++++++++- > posix-aio-compat.c | 12 ++++++++---- > 3 files changed, 39 insertions(+), 37 deletions(-) > > diff --git a/os-posix.c b/os-posix.c > index dbf3b24..a918895 100644 > --- a/os-posix.c > +++ b/os-posix.c > @@ -45,10 +45,6 @@ > #include<sys/syscall.h> > #endif > > -#ifdef CONFIG_EVENTFD > -#include<sys/eventfd.h> > -#endif > - > static struct passwd *user_pwd; > static const char *chroot_dir; > static int daemonize; > @@ -333,34 +329,6 @@ void os_set_line_buffering(void) > setvbuf(stdout, NULL, _IOLBF, 0); > } > > -/* > - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. > - */ > -int qemu_eventfd(int fds[2]) > -{ > -#ifdef CONFIG_EVENTFD > - int ret; > - > - ret = eventfd(0, 0); > - if (ret>= 0) { > - fds[0] = ret; > - qemu_set_cloexec(ret); > - if ((fds[1] = dup(ret)) == -1) { > - close(ret); > - return -1; > - } > - qemu_set_cloexec(fds[1]); > - return 0; > - } > - > - if (errno != ENOSYS) { > - return -1; > - } > -#endif > - > - return qemu_pipe(fds); > -} > - > int qemu_create_pidfile(const char *filename) > { > char buffer[128]; > diff --git a/oslib-posix.c b/oslib-posix.c > index a304fb0..8ef7bd7 100644 > --- a/oslib-posix.c > +++ b/oslib-posix.c > @@ -47,7 +47,9 @@ extern int daemon(int, int); > #include "trace.h" > #include "qemu_socket.h" > > - > +#ifdef CONFIG_EVENTFD > +#include<sys/eventfd.h> > +#endif > > int qemu_daemon(int nochdir, int noclose) > { > @@ -139,6 +141,34 @@ int qemu_pipe(int pipefd[2]) > return ret; > } > > +/* > + * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. > + */ > +int qemu_eventfd(int fds[2]) > +{ > +#ifdef CONFIG_EVENTFD > + int ret; > + > + ret = eventfd(0, 0); > + if (ret>= 0) { > + fds[0] = ret; > + qemu_set_cloexec(ret); > + if ((fds[1] = dup(ret)) == -1) { > + close(ret); > + return -1; > + } > + qemu_set_cloexec(fds[1]); > + return 0; > + } > + > + if (errno != ENOSYS) { > + return -1; > + } > +#endif > + > + return qemu_pipe(fds); > +} > + I think it's a bit dangerous to implement eventfd() in terms of pipe(). You don't expect to handle EAGAIN with eventfd() whereas you have to handle it with pipe(). Moreover, the eventfd() counter is not lossy (practically speaking) whereas if you use pipe() as a counter, it will be lossy in practice. This is why posix aio uses pipe() and not eventfd(). Regards, Anthony Liguori > int qemu_utimensat(int dirfd, const char *path, const struct timespec *times, > int flags) > { > diff --git a/posix-aio-compat.c b/posix-aio-compat.c > index d3c1174..2aa5ba3 100644 > --- a/posix-aio-compat.c > +++ b/posix-aio-compat.c > @@ -521,7 +521,7 @@ static void posix_aio_read(void *opaque) > PosixAioState *s = opaque; > ssize_t len; > > - /* read all bytes from signal pipe */ > + /* read all bytes from eventfd or signal pipe */ > for (;;) { > char bytes[16]; > > @@ -546,10 +546,14 @@ static PosixAioState *posix_aio_state; > > static void posix_aio_notify_event(void) > { > - char byte = 0; > + /* Write 8 bytes to be compatible with eventfd. */ > + static const uint64_t val = 1; > ssize_t ret; > > - ret = write(posix_aio_state->wfd,&byte, sizeof(byte)); > + do { > + ret = write(posix_aio_state->wfd,&val, sizeof(val)); > + } while (ret< 0&& errno == EINTR); > + > if (ret< 0&& errno != EAGAIN) > die("write()"); > } > @@ -665,7 +669,7 @@ int paio_init(void) > s = g_malloc(sizeof(PosixAioState)); > > s->first_aio = NULL; > - if (qemu_pipe(fds) == -1) { > + if (qemu_eventfd(fds) == -1) { > fprintf(stderr, "failed to create pipe\n"); > return -1; > }
On 09/27/2011 05:07 PM, Anthony Liguori wrote: > > You don't expect to handle EAGAIN with eventfd() whereas you have to > handle it with pipe(). > > Moreover, the eventfd() counter is not lossy (practically speaking) > whereas if you use pipe() as a counter, it will be lossy in practice. > > This is why posix aio uses pipe() and not eventfd(). We could define a qemu_event mechanism that satisfies the least common denominator, and is implemented by eventfd when available. qemu_event_create() qemu_event_signal() qemu_event_wait() qemu_event_poll_add() // registers in main loop qemu_event_poll_del()
On 09/27/2011 09:11 AM, Avi Kivity wrote: > On 09/27/2011 05:07 PM, Anthony Liguori wrote: >> >> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it >> with pipe(). >> >> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if >> you use pipe() as a counter, it will be lossy in practice. >> >> This is why posix aio uses pipe() and not eventfd(). > > We could define a qemu_event mechanism that satisfies the least common > denominator, and is implemented by eventfd when available. > > qemu_event_create() > qemu_event_signal() > qemu_event_wait() > qemu_event_poll_add() // registers in main loop > qemu_event_poll_del() See hw/event_notifier.[ch]. Regards, Anthony Liguori >
On 09/27/2011 05:19 PM, Anthony Liguori wrote: >> qemu_event_create() >> qemu_event_signal() >> qemu_event_wait() >> qemu_event_poll_add() // registers in main loop >> qemu_event_poll_del() > > > See hw/event_notifier.[ch]. Precisely.
On 2011-09-27 16:19, Anthony Liguori wrote: > On 09/27/2011 09:11 AM, Avi Kivity wrote: >> On 09/27/2011 05:07 PM, Anthony Liguori wrote: >>> >>> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it >>> with pipe(). >>> >>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if >>> you use pipe() as a counter, it will be lossy in practice. >>> >>> This is why posix aio uses pipe() and not eventfd(). >> >> We could define a qemu_event mechanism that satisfies the least common >> denominator, and is implemented by eventfd when available. >> >> qemu_event_create() >> qemu_event_signal() >> qemu_event_wait() >> qemu_event_poll_add() // registers in main loop >> qemu_event_poll_del() > > See hw/event_notifier.[ch]. That code looks suspicious, btw. It claims things ("we use EFD_SEMAPHORE") it does not do. Jan
On 2011-09-27 16:07, Anthony Liguori wrote: > On 09/27/2011 08:56 AM, Jan Kiszka wrote: >> Move qemu_eventfd unmodified to oslib-posix and use it for signaling >> POSIX AIO completions. If native eventfd suport is available, this >> avoids multiple read accesses to drain multiple pending signals. As >> before we use a pipe if eventfd is not supported. >> >> Signed-off-by: Jan Kiszka<jan.kiszka@siemens.com> >> --- >> os-posix.c | 32 -------------------------------- >> oslib-posix.c | 32 +++++++++++++++++++++++++++++++- >> posix-aio-compat.c | 12 ++++++++---- >> 3 files changed, 39 insertions(+), 37 deletions(-) >> >> diff --git a/os-posix.c b/os-posix.c >> index dbf3b24..a918895 100644 >> --- a/os-posix.c >> +++ b/os-posix.c >> @@ -45,10 +45,6 @@ >> #include<sys/syscall.h> >> #endif >> >> -#ifdef CONFIG_EVENTFD >> -#include<sys/eventfd.h> >> -#endif >> - >> static struct passwd *user_pwd; >> static const char *chroot_dir; >> static int daemonize; >> @@ -333,34 +329,6 @@ void os_set_line_buffering(void) >> setvbuf(stdout, NULL, _IOLBF, 0); >> } >> >> -/* >> - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. >> - */ >> -int qemu_eventfd(int fds[2]) >> -{ >> -#ifdef CONFIG_EVENTFD >> - int ret; >> - >> - ret = eventfd(0, 0); >> - if (ret>= 0) { >> - fds[0] = ret; >> - qemu_set_cloexec(ret); >> - if ((fds[1] = dup(ret)) == -1) { >> - close(ret); >> - return -1; >> - } >> - qemu_set_cloexec(fds[1]); >> - return 0; >> - } >> - >> - if (errno != ENOSYS) { >> - return -1; >> - } >> -#endif >> - >> - return qemu_pipe(fds); >> -} >> - >> int qemu_create_pidfile(const char *filename) >> { >> char buffer[128]; >> diff --git a/oslib-posix.c b/oslib-posix.c >> index a304fb0..8ef7bd7 100644 >> --- a/oslib-posix.c >> +++ b/oslib-posix.c >> @@ -47,7 +47,9 @@ extern int daemon(int, int); >> #include "trace.h" >> #include "qemu_socket.h" >> >> - >> +#ifdef CONFIG_EVENTFD >> +#include<sys/eventfd.h> >> +#endif >> >> int qemu_daemon(int nochdir, int noclose) >> { >> @@ -139,6 +141,34 @@ int qemu_pipe(int pipefd[2]) >> return ret; >> } >> >> +/* >> + * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. >> + */ >> +int qemu_eventfd(int fds[2]) >> +{ >> +#ifdef CONFIG_EVENTFD >> + int ret; >> + >> + ret = eventfd(0, 0); >> + if (ret>= 0) { >> + fds[0] = ret; >> + qemu_set_cloexec(ret); >> + if ((fds[1] = dup(ret)) == -1) { >> + close(ret); >> + return -1; >> + } >> + qemu_set_cloexec(fds[1]); >> + return 0; >> + } >> + >> + if (errno != ENOSYS) { >> + return -1; >> + } >> +#endif >> + >> + return qemu_pipe(fds); >> +} >> + > > I think it's a bit dangerous to implement eventfd() in terms of pipe(). > > You don't expect to handle EAGAIN with eventfd() whereas you have to handle it > with pipe(). EAGAIN is returned on eventfd read if no event is pending and the fd is non-blocking - just as we configure it. > > Moreover, the eventfd() counter is not lossy (practically speaking) whereas if > you use pipe() as a counter, it will be lossy in practice. > > This is why posix aio uses pipe() and not eventfd(). I don't get this yet. eventfd is lossy by default. It only decreases the counter on read if you specify EFD_SEMAPHORE - which we do not do. Jan
On 09/27/2011 05:29 PM, Jan Kiszka wrote: > > > > Moreover, the eventfd() counter is not lossy (practically speaking) whereas if > > you use pipe() as a counter, it will be lossy in practice. > > > > This is why posix aio uses pipe() and not eventfd(). > > I don't get this yet. eventfd is lossy by default. It only decreases the > counter on read if you specify EFD_SEMAPHORE - which we do not do. > It's not lossy - a read returns the number of events written since the last read.
On 09/27/2011 09:29 AM, Jan Kiszka wrote: > On 2011-09-27 16:07, Anthony Liguori wrote: >> On 09/27/2011 08:56 AM, Jan Kiszka wrote: >>> Move qemu_eventfd unmodified to oslib-posix and use it for signaling >>> POSIX AIO completions. If native eventfd suport is available, this >>> avoids multiple read accesses to drain multiple pending signals. As >>> before we use a pipe if eventfd is not supported. >>> >>> Signed-off-by: Jan Kiszka<jan.kiszka@siemens.com> >>> --- >>> os-posix.c | 32 -------------------------------- >>> oslib-posix.c | 32 +++++++++++++++++++++++++++++++- >>> posix-aio-compat.c | 12 ++++++++---- >>> 3 files changed, 39 insertions(+), 37 deletions(-) >>> >>> diff --git a/os-posix.c b/os-posix.c >>> index dbf3b24..a918895 100644 >>> --- a/os-posix.c >>> +++ b/os-posix.c >>> @@ -45,10 +45,6 @@ >>> #include<sys/syscall.h> >>> #endif >>> >>> -#ifdef CONFIG_EVENTFD >>> -#include<sys/eventfd.h> >>> -#endif >>> - >>> static struct passwd *user_pwd; >>> static const char *chroot_dir; >>> static int daemonize; >>> @@ -333,34 +329,6 @@ void os_set_line_buffering(void) >>> setvbuf(stdout, NULL, _IOLBF, 0); >>> } >>> >>> -/* >>> - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. >>> - */ >>> -int qemu_eventfd(int fds[2]) >>> -{ >>> -#ifdef CONFIG_EVENTFD >>> - int ret; >>> - >>> - ret = eventfd(0, 0); >>> - if (ret>= 0) { >>> - fds[0] = ret; >>> - qemu_set_cloexec(ret); >>> - if ((fds[1] = dup(ret)) == -1) { >>> - close(ret); >>> - return -1; >>> - } >>> - qemu_set_cloexec(fds[1]); >>> - return 0; >>> - } >>> - >>> - if (errno != ENOSYS) { >>> - return -1; >>> - } >>> -#endif >>> - >>> - return qemu_pipe(fds); >>> -} >>> - >>> int qemu_create_pidfile(const char *filename) >>> { >>> char buffer[128]; >>> diff --git a/oslib-posix.c b/oslib-posix.c >>> index a304fb0..8ef7bd7 100644 >>> --- a/oslib-posix.c >>> +++ b/oslib-posix.c >>> @@ -47,7 +47,9 @@ extern int daemon(int, int); >>> #include "trace.h" >>> #include "qemu_socket.h" >>> >>> - >>> +#ifdef CONFIG_EVENTFD >>> +#include<sys/eventfd.h> >>> +#endif >>> >>> int qemu_daemon(int nochdir, int noclose) >>> { >>> @@ -139,6 +141,34 @@ int qemu_pipe(int pipefd[2]) >>> return ret; >>> } >>> >>> +/* >>> + * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. >>> + */ >>> +int qemu_eventfd(int fds[2]) >>> +{ >>> +#ifdef CONFIG_EVENTFD >>> + int ret; >>> + >>> + ret = eventfd(0, 0); >>> + if (ret>= 0) { >>> + fds[0] = ret; >>> + qemu_set_cloexec(ret); >>> + if ((fds[1] = dup(ret)) == -1) { >>> + close(ret); >>> + return -1; >>> + } >>> + qemu_set_cloexec(fds[1]); >>> + return 0; >>> + } >>> + >>> + if (errno != ENOSYS) { >>> + return -1; >>> + } >>> +#endif >>> + >>> + return qemu_pipe(fds); >>> +} >>> + >> >> I think it's a bit dangerous to implement eventfd() in terms of pipe(). >> >> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it >> with pipe(). > > EAGAIN is returned on eventfd read if no event is pending and the fd is > non-blocking - just as we configure it. > >> >> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if >> you use pipe() as a counter, it will be lossy in practice. >> >> This is why posix aio uses pipe() and not eventfd(). > > I don't get this yet. eventfd is lossy by default. It only decreases the > counter on read if you specify EFD_SEMAPHORE - which we do not do. uint64_t value; for (i = 0; i < 1 << 32; i++) { value = 1; write(fd, &value, sizeof(value)); } uint64_t count = 0; do { len = read(fd, &value, sizeof(value)); count += value; } while (len != -1); With eventfd, count == 2^32. With pipe, count == 8192. That's each '1' is stored in the pipe buffer whereas with eventfd, an index is just incremented. Not sure what you mean re: EFD_SEMAPHORE. EFD_SEMAPHORE basically means any non-zero value is returned as 1 and the counter is decremented by 1. Without EFD_SEMAPHORE, the count is returned and the counter is reset to 0. Regards, Anthony Liguori > > Jan >
On 2011-09-27 16:34, Avi Kivity wrote: > On 09/27/2011 05:29 PM, Jan Kiszka wrote: >>> >>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if >>> you use pipe() as a counter, it will be lossy in practice. >>> >>> This is why posix aio uses pipe() and not eventfd(). >> >> I don't get this yet. eventfd is lossy by default. It only decreases the >> counter on read if you specify EFD_SEMAPHORE - which we do not do. >> > > It's not lossy - a read returns the number of events written since the > last read. Yeah, but what's the point? We don't evaluate this. Jan
On 09/27/2011 09:22 AM, Jan Kiszka wrote: > On 2011-09-27 16:19, Anthony Liguori wrote: >> On 09/27/2011 09:11 AM, Avi Kivity wrote: >>> On 09/27/2011 05:07 PM, Anthony Liguori wrote: >>>> >>>> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it >>>> with pipe(). >>>> >>>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if >>>> you use pipe() as a counter, it will be lossy in practice. >>>> >>>> This is why posix aio uses pipe() and not eventfd(). >>> >>> We could define a qemu_event mechanism that satisfies the least common >>> denominator, and is implemented by eventfd when available. >>> >>> qemu_event_create() >>> qemu_event_signal() >>> qemu_event_wait() >>> qemu_event_poll_add() // registers in main loop >>> qemu_event_poll_del() >> >> See hw/event_notifier.[ch]. > > That code looks suspicious, btw. It claims things ("we use > EFD_SEMAPHORE") it does not do. Indeed. But the interface appears to be the right one IMHO. Regards, Anthony Liguori > > Jan >
On 09/27/2011 04:07 PM, Anthony Liguori wrote: > > I think it's a bit dangerous to implement eventfd() in terms of pipe(). > > You don't expect to handle EAGAIN with eventfd() whereas you have to > handle it with pipe(). > > Moreover, the eventfd() counter is not lossy (practically speaking) > whereas if you use pipe() as a counter, it will be lossy in practice. > > This is why posix aio uses pipe() and not eventfd(). But this is the same idiom we use for the iothread signaling. We're not using the eventfd's counter. Perhaps it would be nice to complete EventNotifier with "notify event" methods and use it, but Jan's patch is safe, I think. Paolo
On 09/27/2011 05:36 PM, Jan Kiszka wrote: > On 2011-09-27 16:34, Avi Kivity wrote: > > On 09/27/2011 05:29 PM, Jan Kiszka wrote: > >>> > >>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if > >>> you use pipe() as a counter, it will be lossy in practice. > >>> > >>> This is why posix aio uses pipe() and not eventfd(). > >> > >> I don't get this yet. eventfd is lossy by default. It only decreases the > >> counter on read if you specify EFD_SEMAPHORE - which we do not do. > >> > > > > It's not lossy - a read returns the number of events written since the > > last read. > > Yeah, but what's the point? We don't evaluate this. > > If we write an interface that looks like eventfd but subtly differs, someone will get bitten. If we write a new interface and implement it via eventfd (or a pipe), no one gets bitten.
On 2011-09-27 16:42, Avi Kivity wrote: > On 09/27/2011 05:36 PM, Jan Kiszka wrote: >> On 2011-09-27 16:34, Avi Kivity wrote: >>> On 09/27/2011 05:29 PM, Jan Kiszka wrote: >>>>> >>>>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if >>>>> you use pipe() as a counter, it will be lossy in practice. >>>>> >>>>> This is why posix aio uses pipe() and not eventfd(). >>>> >>>> I don't get this yet. eventfd is lossy by default. It only decreases the >>>> counter on read if you specify EFD_SEMAPHORE - which we do not do. >>>> >>> >>> It's not lossy - a read returns the number of events written since the >>> last read. >> >> Yeah, but what's the point? We don't evaluate this. >> >> > > If we write an interface that looks like eventfd but subtly differs, > someone will get bitten. If we write a new interface and implement it > via eventfd (or a pipe), no one gets bitten. I don't disagree that there is still room for improving the existing interface, generalizing to qemu_event. But that's not in the scope of this patch. Jan
On 09/27/2011 05:45 PM, Jan Kiszka wrote: > I don't disagree that there is still room for improving the existing > interface, generalizing to qemu_event. But that's not in the scope of > this patch. > Why not use event_notify.c?
On 2011-09-27 16:48, Avi Kivity wrote: > On 09/27/2011 05:45 PM, Jan Kiszka wrote: >> I don't disagree that there is still room for improving the existing >> interface, generalizing to qemu_event. But that's not in the scope of >> this patch. >> > > Why not use event_notify.c? It doesn't solve the wrapping issue, it mandates eventfd support. Jan
On 09/27/2011 05:50 PM, Jan Kiszka wrote: > On 2011-09-27 16:48, Avi Kivity wrote: > > On 09/27/2011 05:45 PM, Jan Kiszka wrote: > >> I don't disagree that there is still room for improving the existing > >> interface, generalizing to qemu_event. But that's not in the scope of > >> this patch. > >> > > > > Why not use event_notify.c? > > It doesn't solve the wrapping issue, it mandates eventfd support. > We can add pipe fallbacks too (though if it was meant to use with vhost, that's not what we want).
On 09/27/2011 09:54 AM, Avi Kivity wrote: > On 09/27/2011 05:50 PM, Jan Kiszka wrote: >> On 2011-09-27 16:48, Avi Kivity wrote: >> > On 09/27/2011 05:45 PM, Jan Kiszka wrote: >> >> I don't disagree that there is still room for improving the existing >> >> interface, generalizing to qemu_event. But that's not in the scope of >> >> this patch. >> >> >> > >> > Why not use event_notify.c? >> >> It doesn't solve the wrapping issue, it mandates eventfd support. >> > > We can add pipe fallbacks too (though if it was meant to use with vhost, that's > not what we want). vhost cannot exist on a kernel that doesn't support eventfd so I don't think we need to worry about it. Regards, Anthony Liguori >
On 2011-09-27 16:54, Avi Kivity wrote: > On 09/27/2011 05:50 PM, Jan Kiszka wrote: >> On 2011-09-27 16:48, Avi Kivity wrote: >>> On 09/27/2011 05:45 PM, Jan Kiszka wrote: >>>> I don't disagree that there is still room for improving the existing >>>> interface, generalizing to qemu_event. But that's not in the scope of >>>> this patch. >>>> >>> >>> Why not use event_notify.c? >> >> It doesn't solve the wrapping issue, it mandates eventfd support. >> > > We can add pipe fallbacks too (though if it was meant to use with vhost, > that's not what we want). Not a practical issue due to the dependency on much more recent vhost. So EventNotifier will have to be migrated over a generic solution as well. Again, that's food for additional patches. Jan
diff --git a/os-posix.c b/os-posix.c index dbf3b24..a918895 100644 --- a/os-posix.c +++ b/os-posix.c @@ -45,10 +45,6 @@ #include <sys/syscall.h> #endif -#ifdef CONFIG_EVENTFD -#include <sys/eventfd.h> -#endif - static struct passwd *user_pwd; static const char *chroot_dir; static int daemonize; @@ -333,34 +329,6 @@ void os_set_line_buffering(void) setvbuf(stdout, NULL, _IOLBF, 0); } -/* - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. - */ -int qemu_eventfd(int fds[2]) -{ -#ifdef CONFIG_EVENTFD - int ret; - - ret = eventfd(0, 0); - if (ret >= 0) { - fds[0] = ret; - qemu_set_cloexec(ret); - if ((fds[1] = dup(ret)) == -1) { - close(ret); - return -1; - } - qemu_set_cloexec(fds[1]); - return 0; - } - - if (errno != ENOSYS) { - return -1; - } -#endif - - return qemu_pipe(fds); -} - int qemu_create_pidfile(const char *filename) { char buffer[128]; diff --git a/oslib-posix.c b/oslib-posix.c index a304fb0..8ef7bd7 100644 --- a/oslib-posix.c +++ b/oslib-posix.c @@ -47,7 +47,9 @@ extern int daemon(int, int); #include "trace.h" #include "qemu_socket.h" - +#ifdef CONFIG_EVENTFD +#include <sys/eventfd.h> +#endif int qemu_daemon(int nochdir, int noclose) { @@ -139,6 +141,34 @@ int qemu_pipe(int pipefd[2]) return ret; } +/* + * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. + */ +int qemu_eventfd(int fds[2]) +{ +#ifdef CONFIG_EVENTFD + int ret; + + ret = eventfd(0, 0); + if (ret >= 0) { + fds[0] = ret; + qemu_set_cloexec(ret); + if ((fds[1] = dup(ret)) == -1) { + close(ret); + return -1; + } + qemu_set_cloexec(fds[1]); + return 0; + } + + if (errno != ENOSYS) { + return -1; + } +#endif + + return qemu_pipe(fds); +} + int qemu_utimensat(int dirfd, const char *path, const struct timespec *times, int flags) { diff --git a/posix-aio-compat.c b/posix-aio-compat.c index d3c1174..2aa5ba3 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -521,7 +521,7 @@ static void posix_aio_read(void *opaque) PosixAioState *s = opaque; ssize_t len; - /* read all bytes from signal pipe */ + /* read all bytes from eventfd or signal pipe */ for (;;) { char bytes[16]; @@ -546,10 +546,14 @@ static PosixAioState *posix_aio_state; static void posix_aio_notify_event(void) { - char byte = 0; + /* Write 8 bytes to be compatible with eventfd. */ + static const uint64_t val = 1; ssize_t ret; - ret = write(posix_aio_state->wfd, &byte, sizeof(byte)); + do { + ret = write(posix_aio_state->wfd, &val, sizeof(val)); + } while (ret < 0 && errno == EINTR); + if (ret < 0 && errno != EAGAIN) die("write()"); } @@ -665,7 +669,7 @@ int paio_init(void) s = g_malloc(sizeof(PosixAioState)); s->first_aio = NULL; - if (qemu_pipe(fds) == -1) { + if (qemu_eventfd(fds) == -1) { fprintf(stderr, "failed to create pipe\n"); return -1; }
Move qemu_eventfd unmodified to oslib-posix and use it for signaling POSIX AIO completions. If native eventfd suport is available, this avoids multiple read accesses to drain multiple pending signals. As before we use a pipe if eventfd is not supported. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> --- os-posix.c | 32 -------------------------------- oslib-posix.c | 32 +++++++++++++++++++++++++++++++- posix-aio-compat.c | 12 ++++++++---- 3 files changed, 39 insertions(+), 37 deletions(-)