diff mbox

Use qemu_eventfd for POSIX AIO

Message ID 4E81D609.1060203@siemens.com
State New
Headers show

Commit Message

Jan Kiszka Sept. 27, 2011, 1:56 p.m. UTC
Move qemu_eventfd unmodified to oslib-posix and use it for signaling
POSIX AIO completions. If native eventfd suport is available, this
avoids multiple read accesses to drain multiple pending signals. As
before we use a pipe if eventfd is not supported.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 os-posix.c         |   32 --------------------------------
 oslib-posix.c      |   32 +++++++++++++++++++++++++++++++-
 posix-aio-compat.c |   12 ++++++++----
 3 files changed, 39 insertions(+), 37 deletions(-)

Comments

Anthony Liguori Sept. 27, 2011, 2:07 p.m. UTC | #1
On 09/27/2011 08:56 AM, Jan Kiszka wrote:
> Move qemu_eventfd unmodified to oslib-posix and use it for signaling
> POSIX AIO completions. If native eventfd suport is available, this
> avoids multiple read accesses to drain multiple pending signals. As
> before we use a pipe if eventfd is not supported.
>
> Signed-off-by: Jan Kiszka<jan.kiszka@siemens.com>
> ---
>   os-posix.c         |   32 --------------------------------
>   oslib-posix.c      |   32 +++++++++++++++++++++++++++++++-
>   posix-aio-compat.c |   12 ++++++++----
>   3 files changed, 39 insertions(+), 37 deletions(-)
>
> diff --git a/os-posix.c b/os-posix.c
> index dbf3b24..a918895 100644
> --- a/os-posix.c
> +++ b/os-posix.c
> @@ -45,10 +45,6 @@
>   #include<sys/syscall.h>
>   #endif
>
> -#ifdef CONFIG_EVENTFD
> -#include<sys/eventfd.h>
> -#endif
> -
>   static struct passwd *user_pwd;
>   static const char *chroot_dir;
>   static int daemonize;
> @@ -333,34 +329,6 @@ void os_set_line_buffering(void)
>       setvbuf(stdout, NULL, _IOLBF, 0);
>   }
>
> -/*
> - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
> - */
> -int qemu_eventfd(int fds[2])
> -{
> -#ifdef CONFIG_EVENTFD
> -    int ret;
> -
> -    ret = eventfd(0, 0);
> -    if (ret>= 0) {
> -        fds[0] = ret;
> -        qemu_set_cloexec(ret);
> -        if ((fds[1] = dup(ret)) == -1) {
> -            close(ret);
> -            return -1;
> -        }
> -        qemu_set_cloexec(fds[1]);
> -        return 0;
> -    }
> -
> -    if (errno != ENOSYS) {
> -        return -1;
> -    }
> -#endif
> -
> -    return qemu_pipe(fds);
> -}
> -
>   int qemu_create_pidfile(const char *filename)
>   {
>       char buffer[128];
> diff --git a/oslib-posix.c b/oslib-posix.c
> index a304fb0..8ef7bd7 100644
> --- a/oslib-posix.c
> +++ b/oslib-posix.c
> @@ -47,7 +47,9 @@ extern int daemon(int, int);
>   #include "trace.h"
>   #include "qemu_socket.h"
>
> -
> +#ifdef CONFIG_EVENTFD
> +#include<sys/eventfd.h>
> +#endif
>
>   int qemu_daemon(int nochdir, int noclose)
>   {
> @@ -139,6 +141,34 @@ int qemu_pipe(int pipefd[2])
>       return ret;
>   }
>
> +/*
> + * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
> + */
> +int qemu_eventfd(int fds[2])
> +{
> +#ifdef CONFIG_EVENTFD
> +    int ret;
> +
> +    ret = eventfd(0, 0);
> +    if (ret>= 0) {
> +        fds[0] = ret;
> +        qemu_set_cloexec(ret);
> +        if ((fds[1] = dup(ret)) == -1) {
> +            close(ret);
> +            return -1;
> +        }
> +        qemu_set_cloexec(fds[1]);
> +        return 0;
> +    }
> +
> +    if (errno != ENOSYS) {
> +        return -1;
> +    }
> +#endif
> +
> +    return qemu_pipe(fds);
> +}
> +

I think it's a bit dangerous to implement eventfd() in terms of pipe().

You don't expect to handle EAGAIN with eventfd() whereas you have to handle it 
with pipe().

Moreover, the eventfd() counter is not lossy (practically speaking) whereas if 
you use pipe() as a counter, it will be lossy in practice.

This is why posix aio uses pipe() and not eventfd().

Regards,

Anthony Liguori

>   int qemu_utimensat(int dirfd, const char *path, const struct timespec *times,
>                      int flags)
>   {
> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
> index d3c1174..2aa5ba3 100644
> --- a/posix-aio-compat.c
> +++ b/posix-aio-compat.c
> @@ -521,7 +521,7 @@ static void posix_aio_read(void *opaque)
>       PosixAioState *s = opaque;
>       ssize_t len;
>
> -    /* read all bytes from signal pipe */
> +    /* read all bytes from eventfd or signal pipe */
>       for (;;) {
>           char bytes[16];
>
> @@ -546,10 +546,14 @@ static PosixAioState *posix_aio_state;
>
>   static void posix_aio_notify_event(void)
>   {
> -    char byte = 0;
> +    /* Write 8 bytes to be compatible with eventfd.  */
> +    static const uint64_t val = 1;
>       ssize_t ret;
>
> -    ret = write(posix_aio_state->wfd,&byte, sizeof(byte));
> +    do {
> +        ret = write(posix_aio_state->wfd,&val, sizeof(val));
> +    } while (ret<  0&&  errno == EINTR);
> +
>       if (ret<  0&&  errno != EAGAIN)
>           die("write()");
>   }
> @@ -665,7 +669,7 @@ int paio_init(void)
>       s = g_malloc(sizeof(PosixAioState));
>
>       s->first_aio = NULL;
> -    if (qemu_pipe(fds) == -1) {
> +    if (qemu_eventfd(fds) == -1) {
>           fprintf(stderr, "failed to create pipe\n");
>           return -1;
>       }
Avi Kivity Sept. 27, 2011, 2:11 p.m. UTC | #2
On 09/27/2011 05:07 PM, Anthony Liguori wrote:
>
> You don't expect to handle EAGAIN with eventfd() whereas you have to 
> handle it with pipe().
>
> Moreover, the eventfd() counter is not lossy (practically speaking) 
> whereas if you use pipe() as a counter, it will be lossy in practice.
>
> This is why posix aio uses pipe() and not eventfd().

We could define a qemu_event mechanism that satisfies the least common 
denominator, and is implemented by eventfd when available.

qemu_event_create()
qemu_event_signal()
qemu_event_wait()
qemu_event_poll_add() // registers in main loop
qemu_event_poll_del()
Anthony Liguori Sept. 27, 2011, 2:19 p.m. UTC | #3
On 09/27/2011 09:11 AM, Avi Kivity wrote:
> On 09/27/2011 05:07 PM, Anthony Liguori wrote:
>>
>> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it
>> with pipe().
>>
>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
>> you use pipe() as a counter, it will be lossy in practice.
>>
>> This is why posix aio uses pipe() and not eventfd().
>
> We could define a qemu_event mechanism that satisfies the least common
> denominator, and is implemented by eventfd when available.
>
> qemu_event_create()
> qemu_event_signal()
> qemu_event_wait()
> qemu_event_poll_add() // registers in main loop
> qemu_event_poll_del()

See hw/event_notifier.[ch].

Regards,

Anthony Liguori

>
Avi Kivity Sept. 27, 2011, 2:22 p.m. UTC | #4
On 09/27/2011 05:19 PM, Anthony Liguori wrote:
>> qemu_event_create()
>> qemu_event_signal()
>> qemu_event_wait()
>> qemu_event_poll_add() // registers in main loop
>> qemu_event_poll_del()
>
>
> See hw/event_notifier.[ch].

Precisely.
Jan Kiszka Sept. 27, 2011, 2:22 p.m. UTC | #5
On 2011-09-27 16:19, Anthony Liguori wrote:
> On 09/27/2011 09:11 AM, Avi Kivity wrote:
>> On 09/27/2011 05:07 PM, Anthony Liguori wrote:
>>>
>>> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it
>>> with pipe().
>>>
>>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
>>> you use pipe() as a counter, it will be lossy in practice.
>>>
>>> This is why posix aio uses pipe() and not eventfd().
>>
>> We could define a qemu_event mechanism that satisfies the least common
>> denominator, and is implemented by eventfd when available.
>>
>> qemu_event_create()
>> qemu_event_signal()
>> qemu_event_wait()
>> qemu_event_poll_add() // registers in main loop
>> qemu_event_poll_del()
> 
> See hw/event_notifier.[ch].

That code looks suspicious, btw. It claims things ("we use
EFD_SEMAPHORE") it does not do.

Jan
Jan Kiszka Sept. 27, 2011, 2:29 p.m. UTC | #6
On 2011-09-27 16:07, Anthony Liguori wrote:
> On 09/27/2011 08:56 AM, Jan Kiszka wrote:
>> Move qemu_eventfd unmodified to oslib-posix and use it for signaling
>> POSIX AIO completions. If native eventfd suport is available, this
>> avoids multiple read accesses to drain multiple pending signals. As
>> before we use a pipe if eventfd is not supported.
>>
>> Signed-off-by: Jan Kiszka<jan.kiszka@siemens.com>
>> ---
>>   os-posix.c         |   32 --------------------------------
>>   oslib-posix.c      |   32 +++++++++++++++++++++++++++++++-
>>   posix-aio-compat.c |   12 ++++++++----
>>   3 files changed, 39 insertions(+), 37 deletions(-)
>>
>> diff --git a/os-posix.c b/os-posix.c
>> index dbf3b24..a918895 100644
>> --- a/os-posix.c
>> +++ b/os-posix.c
>> @@ -45,10 +45,6 @@
>>   #include<sys/syscall.h>
>>   #endif
>>
>> -#ifdef CONFIG_EVENTFD
>> -#include<sys/eventfd.h>
>> -#endif
>> -
>>   static struct passwd *user_pwd;
>>   static const char *chroot_dir;
>>   static int daemonize;
>> @@ -333,34 +329,6 @@ void os_set_line_buffering(void)
>>       setvbuf(stdout, NULL, _IOLBF, 0);
>>   }
>>
>> -/*
>> - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
>> - */
>> -int qemu_eventfd(int fds[2])
>> -{
>> -#ifdef CONFIG_EVENTFD
>> -    int ret;
>> -
>> -    ret = eventfd(0, 0);
>> -    if (ret>= 0) {
>> -        fds[0] = ret;
>> -        qemu_set_cloexec(ret);
>> -        if ((fds[1] = dup(ret)) == -1) {
>> -            close(ret);
>> -            return -1;
>> -        }
>> -        qemu_set_cloexec(fds[1]);
>> -        return 0;
>> -    }
>> -
>> -    if (errno != ENOSYS) {
>> -        return -1;
>> -    }
>> -#endif
>> -
>> -    return qemu_pipe(fds);
>> -}
>> -
>>   int qemu_create_pidfile(const char *filename)
>>   {
>>       char buffer[128];
>> diff --git a/oslib-posix.c b/oslib-posix.c
>> index a304fb0..8ef7bd7 100644
>> --- a/oslib-posix.c
>> +++ b/oslib-posix.c
>> @@ -47,7 +47,9 @@ extern int daemon(int, int);
>>   #include "trace.h"
>>   #include "qemu_socket.h"
>>
>> -
>> +#ifdef CONFIG_EVENTFD
>> +#include<sys/eventfd.h>
>> +#endif
>>
>>   int qemu_daemon(int nochdir, int noclose)
>>   {
>> @@ -139,6 +141,34 @@ int qemu_pipe(int pipefd[2])
>>       return ret;
>>   }
>>
>> +/*
>> + * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
>> + */
>> +int qemu_eventfd(int fds[2])
>> +{
>> +#ifdef CONFIG_EVENTFD
>> +    int ret;
>> +
>> +    ret = eventfd(0, 0);
>> +    if (ret>= 0) {
>> +        fds[0] = ret;
>> +        qemu_set_cloexec(ret);
>> +        if ((fds[1] = dup(ret)) == -1) {
>> +            close(ret);
>> +            return -1;
>> +        }
>> +        qemu_set_cloexec(fds[1]);
>> +        return 0;
>> +    }
>> +
>> +    if (errno != ENOSYS) {
>> +        return -1;
>> +    }
>> +#endif
>> +
>> +    return qemu_pipe(fds);
>> +}
>> +
> 
> I think it's a bit dangerous to implement eventfd() in terms of pipe().
> 
> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it 
> with pipe().

EAGAIN is returned on eventfd read if no event is pending and the fd is
non-blocking - just as we configure it.

> 
> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if 
> you use pipe() as a counter, it will be lossy in practice.
> 
> This is why posix aio uses pipe() and not eventfd().

I don't get this yet. eventfd is lossy by default. It only decreases the
counter on read if you specify EFD_SEMAPHORE - which we do not do.

Jan
Avi Kivity Sept. 27, 2011, 2:34 p.m. UTC | #7
On 09/27/2011 05:29 PM, Jan Kiszka wrote:
> >
> >  Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
> >  you use pipe() as a counter, it will be lossy in practice.
> >
> >  This is why posix aio uses pipe() and not eventfd().
>
> I don't get this yet. eventfd is lossy by default. It only decreases the
> counter on read if you specify EFD_SEMAPHORE - which we do not do.
>

It's not lossy - a read returns the number of events written since the 
last read.
Anthony Liguori Sept. 27, 2011, 2:36 p.m. UTC | #8
On 09/27/2011 09:29 AM, Jan Kiszka wrote:
> On 2011-09-27 16:07, Anthony Liguori wrote:
>> On 09/27/2011 08:56 AM, Jan Kiszka wrote:
>>> Move qemu_eventfd unmodified to oslib-posix and use it for signaling
>>> POSIX AIO completions. If native eventfd suport is available, this
>>> avoids multiple read accesses to drain multiple pending signals. As
>>> before we use a pipe if eventfd is not supported.
>>>
>>> Signed-off-by: Jan Kiszka<jan.kiszka@siemens.com>
>>> ---
>>>    os-posix.c         |   32 --------------------------------
>>>    oslib-posix.c      |   32 +++++++++++++++++++++++++++++++-
>>>    posix-aio-compat.c |   12 ++++++++----
>>>    3 files changed, 39 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/os-posix.c b/os-posix.c
>>> index dbf3b24..a918895 100644
>>> --- a/os-posix.c
>>> +++ b/os-posix.c
>>> @@ -45,10 +45,6 @@
>>>    #include<sys/syscall.h>
>>>    #endif
>>>
>>> -#ifdef CONFIG_EVENTFD
>>> -#include<sys/eventfd.h>
>>> -#endif
>>> -
>>>    static struct passwd *user_pwd;
>>>    static const char *chroot_dir;
>>>    static int daemonize;
>>> @@ -333,34 +329,6 @@ void os_set_line_buffering(void)
>>>        setvbuf(stdout, NULL, _IOLBF, 0);
>>>    }
>>>
>>> -/*
>>> - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
>>> - */
>>> -int qemu_eventfd(int fds[2])
>>> -{
>>> -#ifdef CONFIG_EVENTFD
>>> -    int ret;
>>> -
>>> -    ret = eventfd(0, 0);
>>> -    if (ret>= 0) {
>>> -        fds[0] = ret;
>>> -        qemu_set_cloexec(ret);
>>> -        if ((fds[1] = dup(ret)) == -1) {
>>> -            close(ret);
>>> -            return -1;
>>> -        }
>>> -        qemu_set_cloexec(fds[1]);
>>> -        return 0;
>>> -    }
>>> -
>>> -    if (errno != ENOSYS) {
>>> -        return -1;
>>> -    }
>>> -#endif
>>> -
>>> -    return qemu_pipe(fds);
>>> -}
>>> -
>>>    int qemu_create_pidfile(const char *filename)
>>>    {
>>>        char buffer[128];
>>> diff --git a/oslib-posix.c b/oslib-posix.c
>>> index a304fb0..8ef7bd7 100644
>>> --- a/oslib-posix.c
>>> +++ b/oslib-posix.c
>>> @@ -47,7 +47,9 @@ extern int daemon(int, int);
>>>    #include "trace.h"
>>>    #include "qemu_socket.h"
>>>
>>> -
>>> +#ifdef CONFIG_EVENTFD
>>> +#include<sys/eventfd.h>
>>> +#endif
>>>
>>>    int qemu_daemon(int nochdir, int noclose)
>>>    {
>>> @@ -139,6 +141,34 @@ int qemu_pipe(int pipefd[2])
>>>        return ret;
>>>    }
>>>
>>> +/*
>>> + * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
>>> + */
>>> +int qemu_eventfd(int fds[2])
>>> +{
>>> +#ifdef CONFIG_EVENTFD
>>> +    int ret;
>>> +
>>> +    ret = eventfd(0, 0);
>>> +    if (ret>= 0) {
>>> +        fds[0] = ret;
>>> +        qemu_set_cloexec(ret);
>>> +        if ((fds[1] = dup(ret)) == -1) {
>>> +            close(ret);
>>> +            return -1;
>>> +        }
>>> +        qemu_set_cloexec(fds[1]);
>>> +        return 0;
>>> +    }
>>> +
>>> +    if (errno != ENOSYS) {
>>> +        return -1;
>>> +    }
>>> +#endif
>>> +
>>> +    return qemu_pipe(fds);
>>> +}
>>> +
>>
>> I think it's a bit dangerous to implement eventfd() in terms of pipe().
>>
>> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it
>> with pipe().
>
> EAGAIN is returned on eventfd read if no event is pending and the fd is
> non-blocking - just as we configure it.
>
>>
>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
>> you use pipe() as a counter, it will be lossy in practice.
>>
>> This is why posix aio uses pipe() and not eventfd().
>
> I don't get this yet. eventfd is lossy by default. It only decreases the
> counter on read if you specify EFD_SEMAPHORE - which we do not do.

uint64_t value;

for (i = 0; i < 1 << 32; i++) {
    value = 1;
    write(fd, &value, sizeof(value));
}

uint64_t count = 0;

do {
    len = read(fd, &value, sizeof(value));
    count += value;
} while (len != -1);

With eventfd, count == 2^32.  With pipe, count == 8192.

That's each '1' is stored in the pipe buffer whereas with eventfd, an index is 
just incremented.

Not sure what you mean re: EFD_SEMAPHORE.  EFD_SEMAPHORE basically means any 
non-zero value is returned as 1 and the counter is decremented by 1.  Without 
EFD_SEMAPHORE, the count is returned and the counter is reset to 0.

Regards,

Anthony Liguori

>
> Jan
>
Jan Kiszka Sept. 27, 2011, 2:36 p.m. UTC | #9
On 2011-09-27 16:34, Avi Kivity wrote:
> On 09/27/2011 05:29 PM, Jan Kiszka wrote:
>>>
>>>  Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
>>>  you use pipe() as a counter, it will be lossy in practice.
>>>
>>>  This is why posix aio uses pipe() and not eventfd().
>>
>> I don't get this yet. eventfd is lossy by default. It only decreases the
>> counter on read if you specify EFD_SEMAPHORE - which we do not do.
>>
> 
> It's not lossy - a read returns the number of events written since the 
> last read.

Yeah, but what's the point? We don't evaluate this.

Jan
Anthony Liguori Sept. 27, 2011, 2:38 p.m. UTC | #10
On 09/27/2011 09:22 AM, Jan Kiszka wrote:
> On 2011-09-27 16:19, Anthony Liguori wrote:
>> On 09/27/2011 09:11 AM, Avi Kivity wrote:
>>> On 09/27/2011 05:07 PM, Anthony Liguori wrote:
>>>>
>>>> You don't expect to handle EAGAIN with eventfd() whereas you have to handle it
>>>> with pipe().
>>>>
>>>> Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
>>>> you use pipe() as a counter, it will be lossy in practice.
>>>>
>>>> This is why posix aio uses pipe() and not eventfd().
>>>
>>> We could define a qemu_event mechanism that satisfies the least common
>>> denominator, and is implemented by eventfd when available.
>>>
>>> qemu_event_create()
>>> qemu_event_signal()
>>> qemu_event_wait()
>>> qemu_event_poll_add() // registers in main loop
>>> qemu_event_poll_del()
>>
>> See hw/event_notifier.[ch].
>
> That code looks suspicious, btw. It claims things ("we use
> EFD_SEMAPHORE") it does not do.

Indeed.

But the interface appears to be the right one IMHO.

Regards,

Anthony Liguori

>
> Jan
>
Paolo Bonzini Sept. 27, 2011, 2:41 p.m. UTC | #11
On 09/27/2011 04:07 PM, Anthony Liguori wrote:
>
> I think it's a bit dangerous to implement eventfd() in terms of pipe().
>
> You don't expect to handle EAGAIN with eventfd() whereas you have to
> handle it with pipe().
>
> Moreover, the eventfd() counter is not lossy (practically speaking)
> whereas if you use pipe() as a counter, it will be lossy in practice.
>
> This is why posix aio uses pipe() and not eventfd().

But this is the same idiom we use for the iothread signaling.  We're not 
using the eventfd's counter.  Perhaps it would be nice to complete 
EventNotifier with "notify event" methods and use it, but Jan's patch is 
safe, I think.

Paolo
Avi Kivity Sept. 27, 2011, 2:42 p.m. UTC | #12
On 09/27/2011 05:36 PM, Jan Kiszka wrote:
> On 2011-09-27 16:34, Avi Kivity wrote:
> >  On 09/27/2011 05:29 PM, Jan Kiszka wrote:
> >>>
> >>>   Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
> >>>   you use pipe() as a counter, it will be lossy in practice.
> >>>
> >>>   This is why posix aio uses pipe() and not eventfd().
> >>
> >>  I don't get this yet. eventfd is lossy by default. It only decreases the
> >>  counter on read if you specify EFD_SEMAPHORE - which we do not do.
> >>
> >
> >  It's not lossy - a read returns the number of events written since the
> >  last read.
>
> Yeah, but what's the point? We don't evaluate this.
>
>

If we write an interface that looks like eventfd but subtly differs, 
someone will get bitten.  If we write a new interface and implement it 
via eventfd (or a pipe), no one gets bitten.
Jan Kiszka Sept. 27, 2011, 2:45 p.m. UTC | #13
On 2011-09-27 16:42, Avi Kivity wrote:
> On 09/27/2011 05:36 PM, Jan Kiszka wrote:
>> On 2011-09-27 16:34, Avi Kivity wrote:
>>>  On 09/27/2011 05:29 PM, Jan Kiszka wrote:
>>>>>
>>>>>   Moreover, the eventfd() counter is not lossy (practically speaking) whereas if
>>>>>   you use pipe() as a counter, it will be lossy in practice.
>>>>>
>>>>>   This is why posix aio uses pipe() and not eventfd().
>>>>
>>>>  I don't get this yet. eventfd is lossy by default. It only decreases the
>>>>  counter on read if you specify EFD_SEMAPHORE - which we do not do.
>>>>
>>>
>>>  It's not lossy - a read returns the number of events written since the
>>>  last read.
>>
>> Yeah, but what's the point? We don't evaluate this.
>>
>>
> 
> If we write an interface that looks like eventfd but subtly differs, 
> someone will get bitten.  If we write a new interface and implement it 
> via eventfd (or a pipe), no one gets bitten.

I don't disagree that there is still room for improving the existing
interface, generalizing to qemu_event. But that's not in the scope of
this patch.

Jan
Avi Kivity Sept. 27, 2011, 2:48 p.m. UTC | #14
On 09/27/2011 05:45 PM, Jan Kiszka wrote:
> I don't disagree that there is still room for improving the existing
> interface, generalizing to qemu_event. But that's not in the scope of
> this patch.
>

Why not use event_notify.c?
Jan Kiszka Sept. 27, 2011, 2:50 p.m. UTC | #15
On 2011-09-27 16:48, Avi Kivity wrote:
> On 09/27/2011 05:45 PM, Jan Kiszka wrote:
>> I don't disagree that there is still room for improving the existing
>> interface, generalizing to qemu_event. But that's not in the scope of
>> this patch.
>>
> 
> Why not use event_notify.c?

It doesn't solve the wrapping issue, it mandates eventfd support.

Jan
Avi Kivity Sept. 27, 2011, 2:54 p.m. UTC | #16
On 09/27/2011 05:50 PM, Jan Kiszka wrote:
> On 2011-09-27 16:48, Avi Kivity wrote:
> >  On 09/27/2011 05:45 PM, Jan Kiszka wrote:
> >>  I don't disagree that there is still room for improving the existing
> >>  interface, generalizing to qemu_event. But that's not in the scope of
> >>  this patch.
> >>
> >
> >  Why not use event_notify.c?
>
> It doesn't solve the wrapping issue, it mandates eventfd support.
>

We can add pipe fallbacks too (though if it was meant to use with vhost, 
that's not what we want).
Anthony Liguori Sept. 27, 2011, 2:57 p.m. UTC | #17
On 09/27/2011 09:54 AM, Avi Kivity wrote:
> On 09/27/2011 05:50 PM, Jan Kiszka wrote:
>> On 2011-09-27 16:48, Avi Kivity wrote:
>> > On 09/27/2011 05:45 PM, Jan Kiszka wrote:
>> >> I don't disagree that there is still room for improving the existing
>> >> interface, generalizing to qemu_event. But that's not in the scope of
>> >> this patch.
>> >>
>> >
>> > Why not use event_notify.c?
>>
>> It doesn't solve the wrapping issue, it mandates eventfd support.
>>
>
> We can add pipe fallbacks too (though if it was meant to use with vhost, that's
> not what we want).

vhost cannot exist on a kernel that doesn't support eventfd so I don't think we 
need to worry about it.

Regards,

Anthony Liguori

>
Jan Kiszka Sept. 27, 2011, 2:59 p.m. UTC | #18
On 2011-09-27 16:54, Avi Kivity wrote:
> On 09/27/2011 05:50 PM, Jan Kiszka wrote:
>> On 2011-09-27 16:48, Avi Kivity wrote:
>>>  On 09/27/2011 05:45 PM, Jan Kiszka wrote:
>>>>  I don't disagree that there is still room for improving the existing
>>>>  interface, generalizing to qemu_event. But that's not in the scope of
>>>>  this patch.
>>>>
>>>
>>>  Why not use event_notify.c?
>>
>> It doesn't solve the wrapping issue, it mandates eventfd support.
>>
> 
> We can add pipe fallbacks too (though if it was meant to use with vhost, 
> that's not what we want).

Not a practical issue due to the dependency on much more recent vhost.

So EventNotifier will have to be migrated over a generic solution as
well. Again, that's food for additional patches.

Jan
diff mbox

Patch

diff --git a/os-posix.c b/os-posix.c
index dbf3b24..a918895 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -45,10 +45,6 @@ 
 #include <sys/syscall.h>
 #endif
 
-#ifdef CONFIG_EVENTFD
-#include <sys/eventfd.h>
-#endif
-
 static struct passwd *user_pwd;
 static const char *chroot_dir;
 static int daemonize;
@@ -333,34 +329,6 @@  void os_set_line_buffering(void)
     setvbuf(stdout, NULL, _IOLBF, 0);
 }
 
-/*
- * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
- */
-int qemu_eventfd(int fds[2])
-{
-#ifdef CONFIG_EVENTFD
-    int ret;
-
-    ret = eventfd(0, 0);
-    if (ret >= 0) {
-        fds[0] = ret;
-        qemu_set_cloexec(ret);
-        if ((fds[1] = dup(ret)) == -1) {
-            close(ret);
-            return -1;
-        }
-        qemu_set_cloexec(fds[1]);
-        return 0;
-    }
-
-    if (errno != ENOSYS) {
-        return -1;
-    }
-#endif
-
-    return qemu_pipe(fds);
-}
-
 int qemu_create_pidfile(const char *filename)
 {
     char buffer[128];
diff --git a/oslib-posix.c b/oslib-posix.c
index a304fb0..8ef7bd7 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -47,7 +47,9 @@  extern int daemon(int, int);
 #include "trace.h"
 #include "qemu_socket.h"
 
-
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
 
 int qemu_daemon(int nochdir, int noclose)
 {
@@ -139,6 +141,34 @@  int qemu_pipe(int pipefd[2])
     return ret;
 }
 
+/*
+ * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
+ */
+int qemu_eventfd(int fds[2])
+{
+#ifdef CONFIG_EVENTFD
+    int ret;
+
+    ret = eventfd(0, 0);
+    if (ret >= 0) {
+        fds[0] = ret;
+        qemu_set_cloexec(ret);
+        if ((fds[1] = dup(ret)) == -1) {
+            close(ret);
+            return -1;
+        }
+        qemu_set_cloexec(fds[1]);
+        return 0;
+    }
+
+    if (errno != ENOSYS) {
+        return -1;
+    }
+#endif
+
+    return qemu_pipe(fds);
+}
+
 int qemu_utimensat(int dirfd, const char *path, const struct timespec *times,
                    int flags)
 {
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index d3c1174..2aa5ba3 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -521,7 +521,7 @@  static void posix_aio_read(void *opaque)
     PosixAioState *s = opaque;
     ssize_t len;
 
-    /* read all bytes from signal pipe */
+    /* read all bytes from eventfd or signal pipe */
     for (;;) {
         char bytes[16];
 
@@ -546,10 +546,14 @@  static PosixAioState *posix_aio_state;
 
 static void posix_aio_notify_event(void)
 {
-    char byte = 0;
+    /* Write 8 bytes to be compatible with eventfd.  */
+    static const uint64_t val = 1;
     ssize_t ret;
 
-    ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
+    do {
+        ret = write(posix_aio_state->wfd, &val, sizeof(val));
+    } while (ret < 0 && errno == EINTR);
+
     if (ret < 0 && errno != EAGAIN)
         die("write()");
 }
@@ -665,7 +669,7 @@  int paio_init(void)
     s = g_malloc(sizeof(PosixAioState));
 
     s->first_aio = NULL;
-    if (qemu_pipe(fds) == -1) {
+    if (qemu_eventfd(fds) == -1) {
         fprintf(stderr, "failed to create pipe\n");
         return -1;
     }