diff mbox

timer: fix qemu_poll_ns early timeout on windows

Message ID 1397551292-1010-1-git-send-email-s.vorobiov@samsung.com
State New
Headers show

Commit Message

Stanislav Vorobiov April 15, 2014, 8:41 a.m. UTC
From: Sangho Park <sangho1206.park@samsung.com>

g_poll has a problem on windows when using
timeouts < 10ms, in glib/gpoll.c:

/* If not, and we have a significant timeout, poll again with
 * timeout then. Note that this will return indication for only
 * one event, or only for messages. We ignore timeouts less than
 * ten milliseconds as they are mostly pointless on Windows, the
 * MsgWaitForMultipleObjectsEx() call will timeout right away
 * anyway.
 */
if (retval == 0 && (timeout == INFINITE || timeout >= 10))
  retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, timeout);

so whenever g_poll is called with timeout < 10ms it does
a quick poll instead of wait, this causes significant performance
degradation of qemu, thus we should use WaitForMultipleObjectsEx
directly

Signed-off-by: Stanislav Vorobiov <s.vorobiov@samsung.com>
---
 qemu-timer.c |   91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

Comments

Stanislav Vorobiov April 17, 2014, 9:07 a.m. UTC | #1
Hi, everyone

Any comments on this one ? This patch fixes
pretty serious performance issues on windows, it would be
great to have this in 2.0.0

On 04/15/2014 12:41 PM, Stanislav Vorobiov wrote:
> From: Sangho Park <sangho1206.park@samsung.com>
> 
> g_poll has a problem on windows when using
> timeouts < 10ms, in glib/gpoll.c:
> 
> /* If not, and we have a significant timeout, poll again with
>  * timeout then. Note that this will return indication for only
>  * one event, or only for messages. We ignore timeouts less than
>  * ten milliseconds as they are mostly pointless on Windows, the
>  * MsgWaitForMultipleObjectsEx() call will timeout right away
>  * anyway.
>  */
> if (retval == 0 && (timeout == INFINITE || timeout >= 10))
>   retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, timeout);
> 
> so whenever g_poll is called with timeout < 10ms it does
> a quick poll instead of wait, this causes significant performance
> degradation of qemu, thus we should use WaitForMultipleObjectsEx
> directly
> 
> Signed-off-by: Stanislav Vorobiov <s.vorobiov@samsung.com>
> ---
>  qemu-timer.c |   91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 91 insertions(+)
> 
> diff --git a/qemu-timer.c b/qemu-timer.c
> index e15ce47..9fb92cb 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -315,6 +315,97 @@ int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
>          ts.tv_nsec = timeout % 1000000000LL;
>          return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
>      }
> +#elif defined(_WIN32)
> +    guint i;
> +    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
> +    gint nhandles = 0;
> +    int num_completed = 0;
> +    gint timeout_ms = qemu_timeout_ns_to_ms(timeout);
> +
> +    for (i = 0; i < nfds; i++) {
> +        gint j;
> +
> +        if (fds[i].fd <= 0) {
> +            continue;
> +        }
> +
> +        /* don't add same handle several times
> +         */
> +        for (j = 0; j < nhandles; j++) {
> +            if (handles[j] == (HANDLE)fds[i].fd) {
> +                break;
> +            }
> +        }
> +
> +        if (j == nhandles) {
> +            if (nhandles == MAXIMUM_WAIT_OBJECTS) {
> +                fprintf(stderr, "Too many handles to wait for!\n");
> +                break;
> +            } else {
> +                handles[nhandles++] = (HANDLE)fds[i].fd;
> +            }
> +        }
> +    }
> +
> +    for (i = 0; i < nfds; ++i) {
> +        fds[i].revents = 0;
> +    }
> +
> +    if (timeout_ms == -1) {
> +        timeout_ms = INFINITE;
> +    }
> +
> +    if (nhandles == 0) {
> +        if (timeout_ms == INFINITE) {
> +            return -1;
> +        } else {
> +            SleepEx(timeout_ms, TRUE);
> +            return 0;
> +        }
> +    }
> +
> +    while (1) {
> +        DWORD res;
> +        gint j;
> +
> +        res = WaitForMultipleObjectsEx(nhandles, handles, FALSE,
> +            timeout_ms, TRUE);
> +
> +        if (res == WAIT_FAILED) {
> +            for (i = 0; i < nfds; ++i) {
> +                fds[i].revents = 0;
> +            }
> +
> +            return -1;
> +        } else if ((res == WAIT_TIMEOUT) || (res == WAIT_IO_COMPLETION) ||
> +                   ((int)res < WAIT_OBJECT_0) ||
> +                   (res >= (WAIT_OBJECT_0 + nhandles))) {
> +            break;
> +        }
> +
> +        for (i = 0; i < nfds; ++i) {
> +            if (handles[res - WAIT_OBJECT_0] == (HANDLE)fds[i].fd) {
> +                fds[i].revents = fds[i].events;
> +            }
> +        }
> +
> +        ++num_completed;
> +
> +        if (nhandles <= 1) {
> +            break;
> +        }
> +
> +        /* poll the rest of the handles
> +         */
> +        for (j = res - WAIT_OBJECT_0 + 1; j < nhandles; j++) {
> +            handles[j - 1] = handles[j];
> +        }
> +        --nhandles;
> +
> +        timeout_ms = 0;
> +    }
> +
> +    return num_completed;
>  #else
>      return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
>  #endif
>
Sangho Park April 18, 2014, 2:11 a.m. UTC | #2
Hi, maintainers. 
Could you check this patch?
http://www.mail-archive.com/qemu-devel@nongnu.org/msg227161.html

Thanks

ps)
We've checked the http://wiki.qemu.org/Contribute/SubmitAPatch. Yet, if we
made any violation or mistake, let us know. We will appreciate your favor.

-----Original Message-----
From: Stanislav Vorobiov [mailto:s.vorobiov@samsung.com] 
Sent: Thursday, April 17, 2014 6:08 PM
To: qemu-devel@nongnu.org
Cc: syeon.hwang@samsung.com; sangho1206.park@samsung.com
Subject: Re: [Qemu-devel] [PATCH] timer: fix qemu_poll_ns early timeout on
windows

Hi, everyone

Any comments on this one ? This patch fixes pretty serious performance
issues on windows, it would be great to have this in 2.0.0

On 04/15/2014 12:41 PM, Stanislav Vorobiov wrote:
> From: Sangho Park <sangho1206.park@samsung.com>
> 
> g_poll has a problem on windows when using timeouts < 10ms, in 
> glib/gpoll.c:
> 
> /* If not, and we have a significant timeout, poll again with
>  * timeout then. Note that this will return indication for only
>  * one event, or only for messages. We ignore timeouts less than
>  * ten milliseconds as they are mostly pointless on Windows, the
>  * MsgWaitForMultipleObjectsEx() call will timeout right away
>  * anyway.
>  */
> if (retval == 0 && (timeout == INFINITE || timeout >= 10))
>   retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, 
> timeout);
> 
> so whenever g_poll is called with timeout < 10ms it does a quick poll 
> instead of wait, this causes significant performance degradation of 
> qemu, thus we should use WaitForMultipleObjectsEx directly
> 
> Signed-off-by: Stanislav Vorobiov <s.vorobiov@samsung.com>
> ---
>  qemu-timer.c |   91
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 91 insertions(+)
> 
> diff --git a/qemu-timer.c b/qemu-timer.c index e15ce47..9fb92cb 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -315,6 +315,97 @@ int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t
timeout)
>          ts.tv_nsec = timeout % 1000000000LL;
>          return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
>      }
> +#elif defined(_WIN32)
> +    guint i;
> +    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
> +    gint nhandles = 0;
> +    int num_completed = 0;
> +    gint timeout_ms = qemu_timeout_ns_to_ms(timeout);
> +
> +    for (i = 0; i < nfds; i++) {
> +        gint j;
> +
> +        if (fds[i].fd <= 0) {
> +            continue;
> +        }
> +
> +        /* don't add same handle several times
> +         */
> +        for (j = 0; j < nhandles; j++) {
> +            if (handles[j] == (HANDLE)fds[i].fd) {
> +                break;
> +            }
> +        }
> +
> +        if (j == nhandles) {
> +            if (nhandles == MAXIMUM_WAIT_OBJECTS) {
> +                fprintf(stderr, "Too many handles to wait for!\n");
> +                break;
> +            } else {
> +                handles[nhandles++] = (HANDLE)fds[i].fd;
> +            }
> +        }
> +    }
> +
> +    for (i = 0; i < nfds; ++i) {
> +        fds[i].revents = 0;
> +    }
> +
> +    if (timeout_ms == -1) {
> +        timeout_ms = INFINITE;
> +    }
> +
> +    if (nhandles == 0) {
> +        if (timeout_ms == INFINITE) {
> +            return -1;
> +        } else {
> +            SleepEx(timeout_ms, TRUE);
> +            return 0;
> +        }
> +    }
> +
> +    while (1) {
> +        DWORD res;
> +        gint j;
> +
> +        res = WaitForMultipleObjectsEx(nhandles, handles, FALSE,
> +            timeout_ms, TRUE);
> +
> +        if (res == WAIT_FAILED) {
> +            for (i = 0; i < nfds; ++i) {
> +                fds[i].revents = 0;
> +            }
> +
> +            return -1;
> +        } else if ((res == WAIT_TIMEOUT) || (res == WAIT_IO_COMPLETION)
||
> +                   ((int)res < WAIT_OBJECT_0) ||
> +                   (res >= (WAIT_OBJECT_0 + nhandles))) {
> +            break;
> +        }
> +
> +        for (i = 0; i < nfds; ++i) {
> +            if (handles[res - WAIT_OBJECT_0] == (HANDLE)fds[i].fd) {
> +                fds[i].revents = fds[i].events;
> +            }
> +        }
> +
> +        ++num_completed;
> +
> +        if (nhandles <= 1) {
> +            break;
> +        }
> +
> +        /* poll the rest of the handles
> +         */
> +        for (j = res - WAIT_OBJECT_0 + 1; j < nhandles; j++) {
> +            handles[j - 1] = handles[j];
> +        }
> +        --nhandles;
> +
> +        timeout_ms = 0;
> +    }
> +
> +    return num_completed;
>  #else
>      return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));  #endif
>
Stefan Weil April 18, 2014, 6:46 a.m. UTC | #3
Hi,

sorry, your patch was too late for QEMU 2.0. It remained unnoticed for
two reasons:

* Patches for some special version should show this in the subject line:
  [PATCH for 2.0] instead of [PATCH]

* CC'ing the maintainers helps a lot, as you see now :-)

More comments below.


Am 18.04.2014 04:11, schrieb Sangho Park:
> Hi, maintainers. 
> Could you check this patch?
> http://www.mail-archive.com/qemu-devel@nongnu.org/msg227161.html
> 
> Thanks
> 
> ps)
> We've checked the http://wiki.qemu.org/Contribute/SubmitAPatch. Yet, if we
> made any violation or mistake, let us know. We will appreciate your favor.
> 
> -----Original Message-----
> From: Stanislav Vorobiov [mailto:s.vorobiov@samsung.com] 
> Sent: Thursday, April 17, 2014 6:08 PM
> To: qemu-devel@nongnu.org
> Cc: syeon.hwang@samsung.com; sangho1206.park@samsung.com
> Subject: Re: [Qemu-devel] [PATCH] timer: fix qemu_poll_ns early timeout on
> windows
> 
> Hi, everyone
> 
> Any comments on this one ? This patch fixes pretty serious performance
> issues on windows, it would be great to have this in 2.0.0
> 
> On 04/15/2014 12:41 PM, Stanislav Vorobiov wrote:
>> From: Sangho Park <sangho1206.park@samsung.com>
>>
>> g_poll has a problem on windows when using timeouts < 10ms, in 
>> glib/gpoll.c:
>>
>> /* If not, and we have a significant timeout, poll again with
>>  * timeout then. Note that this will return indication for only
>>  * one event, or only for messages. We ignore timeouts less than
>>  * ten milliseconds as they are mostly pointless on Windows, the
>>  * MsgWaitForMultipleObjectsEx() call will timeout right away
>>  * anyway.
>>  */
>> if (retval == 0 && (timeout == INFINITE || timeout >= 10))
>>   retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, 
>> timeout);
>>
>> so whenever g_poll is called with timeout < 10ms it does a quick poll 
>> instead of wait, this causes significant performance degradation of 
>> qemu, thus we should use WaitForMultipleObjectsEx directly

Can you quantify this performance degradation and specify your test
scenario? Did you find the source of those small timeouts? Which timeout
values are used there?

Would it be sufficient to round any timeout > 0 and < 10 to 10 for
Windows hosts? Maybe this could be done in qemu_timeout_ns_to_ms. If
this does not work, we still can use g_poll for timeout >= 10 and call a
new Windows specific polling function for timeout < 10.

>>
>> Signed-off-by: Stanislav Vorobiov <s.vorobiov@samsung.com>
>> ---
>>  qemu-timer.c |   91
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 91 insertions(+)
>>
>> diff --git a/qemu-timer.c b/qemu-timer.c index e15ce47..9fb92cb 100644
>> --- a/qemu-timer.c
>> +++ b/qemu-timer.c
>> @@ -315,6 +315,97 @@ int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t
> timeout)
>>          ts.tv_nsec = timeout % 1000000000LL;
>>          return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
>>      }
>> +#elif defined(_WIN32)
>> +    guint i;
>> +    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
>> +    gint nhandles = 0;
>> +    int num_completed = 0;
>> +    gint timeout_ms = qemu_timeout_ns_to_ms(timeout);
>> +
>> +    for (i = 0; i < nfds; i++) {
>> +        gint j;
>> +
>> +        if (fds[i].fd <= 0) {
>> +            continue;
>> +        }
>> +
>> +        /* don't add same handle several times
>> +         */
>> +        for (j = 0; j < nhandles; j++) {
>> +            if (handles[j] == (HANDLE)fds[i].fd) {
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (j == nhandles) {
>> +            if (nhandles == MAXIMUM_WAIT_OBJECTS) {
>> +                fprintf(stderr, "Too many handles to wait for!\n");
>> +                break;
>> +            } else {
>> +                handles[nhandles++] = (HANDLE)fds[i].fd;
>> +            }
>> +        }
>> +    }
>> +
>> +    for (i = 0; i < nfds; ++i) {
>> +        fds[i].revents = 0;
>> +    }
>> +
>> +    if (timeout_ms == -1) {
>> +        timeout_ms = INFINITE;
>> +    }
>> +
>> +    if (nhandles == 0) {
>> +        if (timeout_ms == INFINITE) {
>> +            return -1;
>> +        } else {
>> +            SleepEx(timeout_ms, TRUE);
>> +            return 0;
>> +        }
>> +    }
>> +
>> +    while (1) {
>> +        DWORD res;
>> +        gint j;
>> +
>> +        res = WaitForMultipleObjectsEx(nhandles, handles, FALSE,
>> +            timeout_ms, TRUE);
>> +
>> +        if (res == WAIT_FAILED) {
>> +            for (i = 0; i < nfds; ++i) {
>> +                fds[i].revents = 0;
>> +            }
>> +
>> +            return -1;
>> +        } else if ((res == WAIT_TIMEOUT) || (res == WAIT_IO_COMPLETION)
> ||
>> +                   ((int)res < WAIT_OBJECT_0) ||
>> +                   (res >= (WAIT_OBJECT_0 + nhandles))) {
>> +            break;
>> +        }
>> +
>> +        for (i = 0; i < nfds; ++i) {
>> +            if (handles[res - WAIT_OBJECT_0] == (HANDLE)fds[i].fd) {
>> +                fds[i].revents = fds[i].events;
>> +            }
>> +        }
>> +
>> +        ++num_completed;
>> +
>> +        if (nhandles <= 1) {
>> +            break;
>> +        }
>> +
>> +        /* poll the rest of the handles
>> +         */
>> +        for (j = res - WAIT_OBJECT_0 + 1; j < nhandles; j++) {
>> +            handles[j - 1] = handles[j];
>> +        }
>> +        --nhandles;
>> +
>> +        timeout_ms = 0;
>> +    }
>> +
>> +    return num_completed;
>>  #else
>>      return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));  #endif
>>
> 

That code adds a new dependency on windows.h for qemu-timer.c. If we
really need it, I'd prefer an implementation in util/oslib-win32.c.

Regards
Stefan
Stanislav Vorobiov April 18, 2014, 7:34 a.m. UTC | #4
Hi,

Please see below

On 04/18/2014 10:46 AM, Stefan Weil wrote:
> Hi,
> 
> sorry, your patch was too late for QEMU 2.0. It remained unnoticed for
> two reasons:
> 
> * Patches for some special version should show this in the subject line:
>   [PATCH for 2.0] instead of [PATCH]
> 
> * CC'ing the maintainers helps a lot, as you see now :-)
> 
> More comments below.
> 
> 
> Am 18.04.2014 04:11, schrieb Sangho Park:
>> Hi, maintainers. 
>> Could you check this patch?
>> http://www.mail-archive.com/qemu-devel@nongnu.org/msg227161.html
>>
>> Thanks
>>
>> ps)
>> We've checked the http://wiki.qemu.org/Contribute/SubmitAPatch. Yet, if we
>> made any violation or mistake, let us know. We will appreciate your favor.
>>
>> -----Original Message-----
>> From: Stanislav Vorobiov [mailto:s.vorobiov@samsung.com] 
>> Sent: Thursday, April 17, 2014 6:08 PM
>> To: qemu-devel@nongnu.org
>> Cc: syeon.hwang@samsung.com; sangho1206.park@samsung.com
>> Subject: Re: [Qemu-devel] [PATCH] timer: fix qemu_poll_ns early timeout on
>> windows
>>
>> Hi, everyone
>>
>> Any comments on this one ? This patch fixes pretty serious performance
>> issues on windows, it would be great to have this in 2.0.0
>>
>> On 04/15/2014 12:41 PM, Stanislav Vorobiov wrote:
>>> From: Sangho Park <sangho1206.park@samsung.com>
>>>
>>> g_poll has a problem on windows when using timeouts < 10ms, in 
>>> glib/gpoll.c:
>>>
>>> /* If not, and we have a significant timeout, poll again with
>>>  * timeout then. Note that this will return indication for only
>>>  * one event, or only for messages. We ignore timeouts less than
>>>  * ten milliseconds as they are mostly pointless on Windows, the
>>>  * MsgWaitForMultipleObjectsEx() call will timeout right away
>>>  * anyway.
>>>  */
>>> if (retval == 0 && (timeout == INFINITE || timeout >= 10))
>>>   retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, 
>>> timeout);
>>>
>>> so whenever g_poll is called with timeout < 10ms it does a quick poll 
>>> instead of wait, this causes significant performance degradation of 
>>> qemu, thus we should use WaitForMultipleObjectsEx directly
> 
> Can you quantify this performance degradation and specify your test
> scenario? Did you find the source of those small timeouts? Which timeout
> values are used there?
We don't have a separate test scenario, we've noticed that performance degradation
on tizen emulator after merging with QEMU 1.7. It turned out that it
was caused by this patch series:

e93379b039030c68d85693a4bee2b76f814108d2 aio / timers: Rename qemu_timer_* functions
...
91c68f143d6e707c5783b162292dce38ae358c19 aio / timers: remove dummy_io_handler_flush from tests/test-aio.c

The important part is main-loop.c:main_loop_wait which was:

    ret = os_host_main_loop_wait(timeout);

and became:

    if (timeout == UINT32_MAX) {
        timeout_ns = -1;
    } else {
        timeout_ns = (uint64_t)timeout * (int64_t)(SCALE_MS);
    }

    timeout_ns = qemu_soonest_timeout(timeout_ns,
                                      timerlistgroup_deadline_ns(
                                          &main_loop_tlg));

    ret = os_host_main_loop_wait(timeout_ns);

os_host_main_loop_wait then does:

    g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);

So, the change made main_loop_wait take timer timeouts into account, thus,
qemu_poll_ns started to get called with values < 10ms and this caused problems on
windows since those waits simply turned into polls due to g_poll logic. This is
in fact g_poll's problem, timeout values passed to g_poll mean "at least", i.e.
g_poll MUST wait AT LEAST this time, it's allowed to wait more, but it's not allowed
to wait less. IMHO this should be reported to glib as a bug, but it'll be nice to fix
this in QEMU as well since it'll take too much time to get this into glib (if possible at all).

Do we need to make some separate test to demonstrate this performance degradation ?

> 
> Would it be sufficient to round any timeout > 0 and < 10 to 10 for
> Windows hosts? Maybe this could be done in qemu_timeout_ns_to_ms.
We tried that, it gets almost as good as with this patch, but this
makes timeouts like, say, 2ms wait for 10ms, so with this patch it's
still better.

> If
> this does not work, we still can use g_poll for timeout >= 10 and call a
> new Windows specific polling function for timeout < 10.
Does it have a point to separate things, if we've implemented this for timeouts < 10ms
why not use it for timeouts >= 10ms ? The code in this patch is in fact g_poll's
implementation without that "timeout >= 10" hack and it handles timeouts >= 10ms
as well as g_poll does.

> 
>>>
>>> Signed-off-by: Stanislav Vorobiov <s.vorobiov@samsung.com>
>>> ---
>>>  qemu-timer.c |   91
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 91 insertions(+)
>>>
>>> diff --git a/qemu-timer.c b/qemu-timer.c index e15ce47..9fb92cb 100644
>>> --- a/qemu-timer.c
>>> +++ b/qemu-timer.c
>>> @@ -315,6 +315,97 @@ int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t
>> timeout)
>>>          ts.tv_nsec = timeout % 1000000000LL;
>>>          return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
>>>      }
>>> +#elif defined(_WIN32)
>>> +    guint i;
>>> +    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
>>> +    gint nhandles = 0;
>>> +    int num_completed = 0;
>>> +    gint timeout_ms = qemu_timeout_ns_to_ms(timeout);
>>> +
>>> +    for (i = 0; i < nfds; i++) {
>>> +        gint j;
>>> +
>>> +        if (fds[i].fd <= 0) {
>>> +            continue;
>>> +        }
>>> +
>>> +        /* don't add same handle several times
>>> +         */
>>> +        for (j = 0; j < nhandles; j++) {
>>> +            if (handles[j] == (HANDLE)fds[i].fd) {
>>> +                break;
>>> +            }
>>> +        }
>>> +
>>> +        if (j == nhandles) {
>>> +            if (nhandles == MAXIMUM_WAIT_OBJECTS) {
>>> +                fprintf(stderr, "Too many handles to wait for!\n");
>>> +                break;
>>> +            } else {
>>> +                handles[nhandles++] = (HANDLE)fds[i].fd;
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    for (i = 0; i < nfds; ++i) {
>>> +        fds[i].revents = 0;
>>> +    }
>>> +
>>> +    if (timeout_ms == -1) {
>>> +        timeout_ms = INFINITE;
>>> +    }
>>> +
>>> +    if (nhandles == 0) {
>>> +        if (timeout_ms == INFINITE) {
>>> +            return -1;
>>> +        } else {
>>> +            SleepEx(timeout_ms, TRUE);
>>> +            return 0;
>>> +        }
>>> +    }
>>> +
>>> +    while (1) {
>>> +        DWORD res;
>>> +        gint j;
>>> +
>>> +        res = WaitForMultipleObjectsEx(nhandles, handles, FALSE,
>>> +            timeout_ms, TRUE);
>>> +
>>> +        if (res == WAIT_FAILED) {
>>> +            for (i = 0; i < nfds; ++i) {
>>> +                fds[i].revents = 0;
>>> +            }
>>> +
>>> +            return -1;
>>> +        } else if ((res == WAIT_TIMEOUT) || (res == WAIT_IO_COMPLETION)
>> ||
>>> +                   ((int)res < WAIT_OBJECT_0) ||
>>> +                   (res >= (WAIT_OBJECT_0 + nhandles))) {
>>> +            break;
>>> +        }
>>> +
>>> +        for (i = 0; i < nfds; ++i) {
>>> +            if (handles[res - WAIT_OBJECT_0] == (HANDLE)fds[i].fd) {
>>> +                fds[i].revents = fds[i].events;
>>> +            }
>>> +        }
>>> +
>>> +        ++num_completed;
>>> +
>>> +        if (nhandles <= 1) {
>>> +            break;
>>> +        }
>>> +
>>> +        /* poll the rest of the handles
>>> +         */
>>> +        for (j = res - WAIT_OBJECT_0 + 1; j < nhandles; j++) {
>>> +            handles[j - 1] = handles[j];
>>> +        }
>>> +        --nhandles;
>>> +
>>> +        timeout_ms = 0;
>>> +    }
>>> +
>>> +    return num_completed;
>>>  #else
>>>      return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));  #endif
>>>
>>
> 
> That code adds a new dependency on windows.h for qemu-timer.c. If we
> really need it, I'd prefer an implementation in util/oslib-win32.c.
m.b. it makes sense to move entire qemu_poll_ns to oslib then ? like
this patch to oslib-win32.c and the rest of the stuff to oslib-posix.c ?

> 
> Regards
> Stefan
> 
>
Stefan Weil April 18, 2014, 8:03 a.m. UTC | #5
Am 18.04.2014 09:34, schrieb Stanislav Vorobiov:
> Hi,
> 
> Please see below
> 
> On 04/18/2014 10:46 AM, Stefan Weil wrote:
>> Hi,
[...]
> 
>>
>> Would it be sufficient to round any timeout > 0 and < 10 to 10 for
>> Windows hosts? Maybe this could be done in qemu_timeout_ns_to_ms.
> We tried that, it gets almost as good as with this patch, but this
> makes timeouts like, say, 2ms wait for 10ms, so with this patch it's
> still better.
> 
>> If
>> this does not work, we still can use g_poll for timeout >= 10 and call a
>> new Windows specific polling function for timeout < 10.
> Does it have a point to separate things, if we've implemented this for timeouts < 10ms
> why not use it for timeouts >= 10ms ? The code in this patch is in fact g_poll's
> implementation without that "timeout >= 10" hack and it handles timeouts >= 10ms
> as well as g_poll does.


Did you send a bug report to the maintainers of glib? I think it would
be good to do so.

We can override the buggy implementation of g_poll for Windows by
redirecting any call of g_poll to a new g_poll_fixed function.

In file include/glib-compat.h:

#if defined(_WIN32)
/* g_poll does not handle timeout < 10 ms correctly, so use wrapper. */
#define g_poll(fds, nfds, timeout) g_poll_fixed((fds, nfds, timeout)
gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout);
#endif

Then move your code into a new function g_poll_fixed() in file
util/oslib-win32.c (and add there some comments, too).

There is also some g_poll code in include/qemu-common.h. This should
also be moved to include/glib-compat.h and must be excluded for _WIN32
when you add g_poll_fixed.

Stefan
Alex Bligh April 18, 2014, 8:29 a.m. UTC | #6
On 18 Apr 2014, at 03:11, Sangho Park wrote:

>> 
>> g_poll has a problem on windows when using timeouts < 10ms, in 
>> glib/gpoll.c:
>> 
>> /* If not, and we have a significant timeout, poll again with
>> * timeout then. Note that this will return indication for only
>> * one event, or only for messages. We ignore timeouts less than
>> * ten milliseconds as they are mostly pointless on Windows, the
>> * MsgWaitForMultipleObjectsEx() call will timeout right away
>> * anyway.
>> */
>> if (retval == 0 && (timeout == INFINITE || timeout >= 10))
>>  retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, 
>> timeout);
>> 
>> so whenever g_poll is called with timeout < 10ms it does a quick poll 
>> instead of wait, this causes significant performance degradation of 
>> qemu, thus we should use WaitForMultipleObjectsEx directly

As the author of the original changes, I don't know enough about Windows
to know whether this change is sensible, but will happily admit my
Windows coding was done completely blind (as I said at the time)
and required more third party testing, so it would be unsurprising
if there were issues.

On the general issue, the problem you will face if g_poll waits LESS
than the amount of time is that you'll get a busy wait for a while.
For instance, if a timeout is scheduled 9ms ahead, and g_poll (or
replacement) waits 0ms, then the system will continuously call g_poll,
check whether a timer has expired, call g_poll, check whether a timer
has expired and so on. During this period, no actual work will get
done.

Therefore, if the underlying poll has lower resolution than 1 ns (and
does not wait for, at a minimum) the value specified, then it should
round up.

On Windows (or more accurately in the absence of ppoll), qemu uses gpoll
and converts the timeout using qemu_timeout_ns_to_ms. What this does
is returns 0 for a non-blocking timeout of 0, but otherwise rounds
UP to the nearest millisecond. Code pasted below:

int qemu_timeout_ns_to_ms(int64_t ns)
{
    int64_t ms;
    if (ns < 0) {
        return -1;
    }

    if (!ns) {
        return 0;
    }

    /* Always round up, because it's better to wait too long than to wait too
     * little and effectively busy-wait
     */
    ms = (ns + SCALE_MS - 1) / SCALE_MS;

    /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
    if (ms > (int64_t) INT32_MAX) {
        ms = INT32_MAX;
    }

    return (int) ms;
}


I suspect the easiest fix (certainly for debug purposes) would be to make all
polls for non-zero time at least 10ms long. IE after the assignment of 'ms',
do something like

#ifdef BROKEN_POLL
    if (ms < 10) {
        ms = 10;
#endif
    }

If you're actually saying g_poll has 10 millisecond resolution, or rounds down,
then the appropriate course of action would be to change the rounding line to

    ms = (ns + 10*SCALE_MS - 1) / (10*SCALE_MS);

where g_poll is broken like this.

As I say, I have no Windows box to test this on, but I hope that's helpful.
Stanislav Vorobiov April 18, 2014, 9:26 a.m. UTC | #7
Hi,

Please see below

On 04/18/2014 12:29 PM, Alex Bligh wrote:
> 
> On 18 Apr 2014, at 03:11, Sangho Park wrote:
> 
>>>
>>> g_poll has a problem on windows when using timeouts < 10ms, in 
>>> glib/gpoll.c:
>>>
>>> /* If not, and we have a significant timeout, poll again with
>>> * timeout then. Note that this will return indication for only
>>> * one event, or only for messages. We ignore timeouts less than
>>> * ten milliseconds as they are mostly pointless on Windows, the
>>> * MsgWaitForMultipleObjectsEx() call will timeout right away
>>> * anyway.
>>> */
>>> if (retval == 0 && (timeout == INFINITE || timeout >= 10))
>>>  retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, 
>>> timeout);
>>>
>>> so whenever g_poll is called with timeout < 10ms it does a quick poll 
>>> instead of wait, this causes significant performance degradation of 
>>> qemu, thus we should use WaitForMultipleObjectsEx directly
> 
> As the author of the original changes, I don't know enough about Windows
> to know whether this change is sensible, but will happily admit my
> Windows coding was done completely blind (as I said at the time)
> and required more third party testing, so it would be unsurprising
> if there were issues.
> 
> On the general issue, the problem you will face if g_poll waits LESS
> than the amount of time is that you'll get a busy wait for a while.
> For instance, if a timeout is scheduled 9ms ahead, and g_poll (or
> replacement) waits 0ms, then the system will continuously call g_poll,
> check whether a timer has expired, call g_poll, check whether a timer
> has expired and so on. During this period, no actual work will get
> done.
Yes, this is exactly what we've experienced, CPU was hogged and overall
performance of guest dropped.

> 
> Therefore, if the underlying poll has lower resolution than 1 ns (and
> does not wait for, at a minimum) the value specified, then it should
> round up.
> 
> On Windows (or more accurately in the absence of ppoll), qemu uses gpoll
> and converts the timeout using qemu_timeout_ns_to_ms. What this does
> is returns 0 for a non-blocking timeout of 0, but otherwise rounds
> UP to the nearest millisecond. Code pasted below:
> 
> int qemu_timeout_ns_to_ms(int64_t ns)
> {
>     int64_t ms;
>     if (ns < 0) {
>         return -1;
>     }
> 
>     if (!ns) {
>         return 0;
>     }
> 
>     /* Always round up, because it's better to wait too long than to wait too
>      * little and effectively busy-wait
>      */
>     ms = (ns + SCALE_MS - 1) / SCALE_MS;
> 
>     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
>     if (ms > (int64_t) INT32_MAX) {
>         ms = INT32_MAX;
>     }
> 
>     return (int) ms;
> }
> 
> 
> I suspect the easiest fix (certainly for debug purposes) would be to make all
> polls for non-zero time at least 10ms long. IE after the assignment of 'ms',
> do something like
> 
> #ifdef BROKEN_POLL
>     if (ms < 10) {
>         ms = 10;
> #endif
>     }
> 
> If you're actually saying g_poll has 10 millisecond resolution, or rounds down,
> then the appropriate course of action would be to change the rounding line to
> 
>     ms = (ns + 10*SCALE_MS - 1) / (10*SCALE_MS);
> 
> where g_poll is broken like this.
> 
> As I say, I have no Windows box to test this on, but I hope that's helpful.
Yes, it's possible to work around like this, but if we look at this:

     if (ms < 10) {
         ms = 10;
     }

the question arises: where did 10 come from ? It looks like a magic number and in fact
it is, it was taken from glib's gpoll.c. But what if tomorrow glib changes and use, say,
15 in gpoll.c, then we'll observe the same CPU hogging in qemu again.

>
Stanislav Vorobiov April 18, 2014, 9:28 a.m. UTC | #8
Hi, see below

On 04/18/2014 12:03 PM, Stefan Weil wrote:
> Am 18.04.2014 09:34, schrieb Stanislav Vorobiov:
>> Hi,
>>
>> Please see below
>>
>> On 04/18/2014 10:46 AM, Stefan Weil wrote:
>>> Hi,
> [...]
>>
>>>
>>> Would it be sufficient to round any timeout > 0 and < 10 to 10 for
>>> Windows hosts? Maybe this could be done in qemu_timeout_ns_to_ms.
>> We tried that, it gets almost as good as with this patch, but this
>> makes timeouts like, say, 2ms wait for 10ms, so with this patch it's
>> still better.
>>
>>> If
>>> this does not work, we still can use g_poll for timeout >= 10 and call a
>>> new Windows specific polling function for timeout < 10.
>> Does it have a point to separate things, if we've implemented this for timeouts < 10ms
>> why not use it for timeouts >= 10ms ? The code in this patch is in fact g_poll's
>> implementation without that "timeout >= 10" hack and it handles timeouts >= 10ms
>> as well as g_poll does.
> 
> 
> Did you send a bug report to the maintainers of glib? I think it would
> be good to do so.
I've filed a bug report here - https://bugzilla.gnome.org/show_bug.cgi?id=728486

> 
> We can override the buggy implementation of g_poll for Windows by
> redirecting any call of g_poll to a new g_poll_fixed function.
> 
> In file include/glib-compat.h:
> 
> #if defined(_WIN32)
> /* g_poll does not handle timeout < 10 ms correctly, so use wrapper. */
> #define g_poll(fds, nfds, timeout) g_poll_fixed((fds, nfds, timeout)
> gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout);
> #endif
> 
> Then move your code into a new function g_poll_fixed() in file
> util/oslib-win32.c (and add there some comments, too).
> 
> There is also some g_poll code in include/qemu-common.h. This should
> also be moved to include/glib-compat.h and must be excluded for _WIN32
> when you add g_poll_fixed.
> 
> Stefan
> 
>
Alex Bligh April 18, 2014, 9:46 a.m. UTC | #9
On 18 Apr 2014, at 10:26, Stanislav Vorobiov wrote:

> Yes, it's possible to work around like this, but if we look at this:
> 
>     if (ms < 10) {
>         ms = 10;
>     }
> 
> the question arises: where did 10 come from ? It looks like a magic number and in fact
> it is, it was taken from glib's gpoll.c. But what if tomorrow glib changes and use, say,
> 15 in gpoll.c, then we'll observe the same CPU hogging in qemu again.

Well obviously the real fix is either to fix the bug in glib, or to do what I did
when I introduced ppoll, which was to conclude that the glib implementation of
gpoll was inadequate and that it should be replaced locally if possible with something
with microsecond, if not nanosecond, resolution (obviously nanosecond resolution itself
never occurs in practice). Perhaps that's what you are trying to do and I have
misunderstood.
Paolo Bonzini April 28, 2014, 9:10 a.m. UTC | #10
Il 18/04/2014 10:29, Alex Bligh ha scritto:
> I suspect the easiest fix (certainly for debug purposes) would be to make all
> polls for non-zero time at least 10ms long. IE after the assignment of 'ms',
> do something like
>
> #ifdef BROKEN_POLL
>     if (ms < 10) {
>         ms = 10;
> #endif
>     }
>
> If you're actually saying g_poll has 10 millisecond resolution, or rounds down,
> then the appropriate course of action would be to change the rounding line to
>
>     ms = (ns + 10*SCALE_MS - 1) / (10*SCALE_MS);
>
> where g_poll is broken like this.
>
> As I say, I have no Windows box to test this on, but I hope that's helpful.

Unfortunately that would probably break guests.

Windows's default poll resolution is indeed around 10 ms, and that's 
where glib comes from.  But it can be lowered with timeBeginPeriod and 
timeEndPeriod, which is what QEMU does (note: the functions are 
basically nops with WINE, but if you remove them things actually break 
on Windows).

But it's unlikely that glib will change their code, which also makes 
sense in the common case where you don't call 
timeBeginPeriod/timeEndPeriod.  So I think Stanislav's patch makes sense.

Paolo
diff mbox

Patch

diff --git a/qemu-timer.c b/qemu-timer.c
index e15ce47..9fb92cb 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -315,6 +315,97 @@  int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
         ts.tv_nsec = timeout % 1000000000LL;
         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
     }
+#elif defined(_WIN32)
+    guint i;
+    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
+    gint nhandles = 0;
+    int num_completed = 0;
+    gint timeout_ms = qemu_timeout_ns_to_ms(timeout);
+
+    for (i = 0; i < nfds; i++) {
+        gint j;
+
+        if (fds[i].fd <= 0) {
+            continue;
+        }
+
+        /* don't add same handle several times
+         */
+        for (j = 0; j < nhandles; j++) {
+            if (handles[j] == (HANDLE)fds[i].fd) {
+                break;
+            }
+        }
+
+        if (j == nhandles) {
+            if (nhandles == MAXIMUM_WAIT_OBJECTS) {
+                fprintf(stderr, "Too many handles to wait for!\n");
+                break;
+            } else {
+                handles[nhandles++] = (HANDLE)fds[i].fd;
+            }
+        }
+    }
+
+    for (i = 0; i < nfds; ++i) {
+        fds[i].revents = 0;
+    }
+
+    if (timeout_ms == -1) {
+        timeout_ms = INFINITE;
+    }
+
+    if (nhandles == 0) {
+        if (timeout_ms == INFINITE) {
+            return -1;
+        } else {
+            SleepEx(timeout_ms, TRUE);
+            return 0;
+        }
+    }
+
+    while (1) {
+        DWORD res;
+        gint j;
+
+        res = WaitForMultipleObjectsEx(nhandles, handles, FALSE,
+            timeout_ms, TRUE);
+
+        if (res == WAIT_FAILED) {
+            for (i = 0; i < nfds; ++i) {
+                fds[i].revents = 0;
+            }
+
+            return -1;
+        } else if ((res == WAIT_TIMEOUT) || (res == WAIT_IO_COMPLETION) ||
+                   ((int)res < WAIT_OBJECT_0) ||
+                   (res >= (WAIT_OBJECT_0 + nhandles))) {
+            break;
+        }
+
+        for (i = 0; i < nfds; ++i) {
+            if (handles[res - WAIT_OBJECT_0] == (HANDLE)fds[i].fd) {
+                fds[i].revents = fds[i].events;
+            }
+        }
+
+        ++num_completed;
+
+        if (nhandles <= 1) {
+            break;
+        }
+
+        /* poll the rest of the handles
+         */
+        for (j = res - WAIT_OBJECT_0 + 1; j < nhandles; j++) {
+            handles[j - 1] = handles[j];
+        }
+        --nhandles;
+
+        timeout_ms = 0;
+    }
+
+    return num_completed;
 #else
     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
 #endif