diff mbox

[v2] thread-win32: fix GetThreadContext() permanently fails

Message ID 1435148753-4476-1-git-send-email-zavadovsky.yan@gmail.com
State New
Headers show

Commit Message

Zavadovsky Yan June 24, 2015, 12:25 p.m. UTC
Calling SuspendThread() is not enough to suspend Win32 thread.
We need to call GetThreadContext() after SuspendThread()
to make sure that OS have really suspended target thread.
But GetThreadContext() needs for THREAD_GET_CONTEXT
access right on thread object.
More info about this technique can be found here:
http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx

This patch adds THREAD_GET_CONTEXT to OpenThread() arguments
and change oddity 'while(GetThreadContext() == SUCCESS)' to
'if(GetThreadContext() == FAILED){exit(1);}'.
So this block of code will continue only after successful
grabbing of thread context(i.e. when thread is really suspended).
And halts otherwise with more verbose error message than previous.

Signed-off-by: Zavadovsky Yan <zavadovsky.yan@gmail.com>
---
 cpus.c                   | 14 ++++++++------
 util/qemu-thread-win32.c |  4 ++--
 2 files changed, 10 insertions(+), 8 deletions(-)

Comments

Olga Krishtal June 25, 2015, 9:11 a.m. UTC | #1
On 24/06/15 15:25, Zavadovsky Yan wrote:
> Calling SuspendThread() is not enough to suspend Win32 thread.
> We need to call GetThreadContext() after SuspendThread()
> to make sure that OS have really suspended target thread.
> But GetThreadContext() needs for THREAD_GET_CONTEXT
> access right on thread object.
> More info about this technique can be found here:
> http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
>
> This patch adds THREAD_GET_CONTEXT to OpenThread() arguments
> and change oddity 'while(GetThreadContext() == SUCCESS)' to
> 'if(GetThreadContext() == FAILED){exit(1);}'.
> So this block of code will continue only after successful
> grabbing of thread context(i.e. when thread is really suspended).
> And halts otherwise with more verbose error message than previous.
> Signed-off-by: Zavadovsky Yan <zavadovsky.yan@gmail.com>
> ---
>   cpus.c                   | 14 ++++++++------
>   util/qemu-thread-win32.c |  4 ++--
>   2 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 4f0e54d..0df6a7d 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1089,8 +1089,8 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>           CONTEXT tcgContext;
>   
>           if (SuspendThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> +            fprintf(stderr, "qemu:%s: SuspendThread GetLastError:%lu\n",
> +                    __func__, GetLastError());
>               exit(1);
>           }
>   
> @@ -1098,15 +1098,17 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>            * suspended until we can get the context.
>            */
>           tcgContext.ContextFlags = CONTEXT_CONTROL;
> -        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
> -            continue;
I would like to ask you if you have faced this situation in reality?
I have some doubts about changing the while() loop. According to the 
article -
thread may mot be suspended immediately after code, due to the busy 
scheduler.
If we do exit(1) just right after check of GetThreadContext(..) we can 
miss the situation when
scheduler is too busy at the moment and just go down.  From this point 
of view this while -
is busy loop and gives the scheduler the opportunity to do its job. So, 
I am not sure about it.
> +        if (GetThreadContext(cpu->hThread, &tcgContext) == 0) {
> +            fprintf(stderr, "qemu:%s: GetThreadContext GetLastError:%lu\n",
> +                    __func__, GetLastError());
> +            exit(1);
>           }
>   
>           cpu_signal(0);
>   
>           if (ResumeThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> +            fprintf(stderr, "qemu:%s: ResumeThread GetLastError:%lu\n",
> +                    __func__, GetLastError());
>               exit(1);
>           }
>       }
> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
> index 406b52f..823eca1 100644
> --- a/util/qemu-thread-win32.c
> +++ b/util/qemu-thread-win32.c
> @@ -406,8 +406,8 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
>   
>       EnterCriticalSection(&data->cs);
>       if (!data->exited) {
> -        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
> -                            thread->tid);
What was before the usage of this flag? I mean the behavior? As I can 
see it worked even without this flag.
> +        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT,
> +                            FALSE, thread->tid);
>       } else {
>           handle = NULL;
>       }
Zavadovsky Yan June 25, 2015, 12:17 p.m. UTC | #2
On Thu, Jun 25, 2015 at 12:11 PM, Olga Krishtal <okrishtal@parallels.com>
wrote:

> On 24/06/15 15:25, Zavadovsky Yan wrote:
>
>> Calling SuspendThread() is not enough to suspend Win32 thread.
>> We need to call GetThreadContext() after SuspendThread()
>> to make sure that OS have really suspended target thread.
>> But GetThreadContext() needs for THREAD_GET_CONTEXT
>> access right on thread object.
>> More info about this technique can be found here:
>> http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
>>
>> This patch adds THREAD_GET_CONTEXT to OpenThread() arguments
>> and change oddity 'while(GetThreadContext() == SUCCESS)' to
>> 'if(GetThreadContext() == FAILED){exit(1);}'.
>> So this block of code will continue only after successful
>> grabbing of thread context(i.e. when thread is really suspended).
>> And halts otherwise with more verbose error message than previous.
>> Signed-off-by: Zavadovsky Yan <zavadovsky.yan@gmail.com>
>> ---
>>   cpus.c                   | 14 ++++++++------
>>   util/qemu-thread-win32.c |  4 ++--
>>   2 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/cpus.c b/cpus.c
>> index 4f0e54d..0df6a7d 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -1089,8 +1089,8 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>>           CONTEXT tcgContext;
>>             if (SuspendThread(cpu->hThread) == (DWORD)-1) {
>> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>> -                    GetLastError());
>> +            fprintf(stderr, "qemu:%s: SuspendThread GetLastError:%lu\n",
>> +                    __func__, GetLastError());
>>               exit(1);
>>           }
>>   @@ -1098,15 +1098,17 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>>            * suspended until we can get the context.
>>            */
>>           tcgContext.ContextFlags = CONTEXT_CONTROL;
>> -        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
>> -            continue;
>>
> I would like to ask you if you have faced this situation in reality?
> I have some doubts about changing the while() loop. According to the
> article -
>
SuspendThread is async operation. But GetContext is not async. And
GetContext forces this pending suspension to complete.

thread may mot be suspended immediately after code, due to the busy
> scheduler.
> If we do exit(1) just right after check of GetThreadContext(..) we can
> miss the situation when
> scheduler is too busy at the moment and just go down.  From this point of
> view this while -
> is busy loop and gives the scheduler the opportunity to do its job.

If GetThreadContext fails in first call it will fail in next consecutive
calls for this thread handle.


> So, I am not sure about it.
>
>> +        if (GetThreadContext(cpu->hThread, &tcgContext) == 0) {
>> +            fprintf(stderr, "qemu:%s: GetThreadContext
>> GetLastError:%lu\n",
>> +                    __func__, GetLastError());
>> +            exit(1);
>>           }
>>             cpu_signal(0);
>>             if (ResumeThread(cpu->hThread) == (DWORD)-1) {
>> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>> -                    GetLastError());
>> +            fprintf(stderr, "qemu:%s: ResumeThread GetLastError:%lu\n",
>> +                    __func__, GetLastError());
>>               exit(1);
>>           }
>>       }
>> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
>> index 406b52f..823eca1 100644
>> --- a/util/qemu-thread-win32.c
>> +++ b/util/qemu-thread-win32.c
>> @@ -406,8 +406,8 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
>>         EnterCriticalSection(&data->cs);
>>       if (!data->exited) {
>> -        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
>> -                            thread->tid);
>>
> What was before the usage of this flag?

GetThreadContext returns zero, i.e. error. GetLastError says "5" i.e.
"access denied".

I mean the behavior? As I can see it worked even without this flag.

This code works because:
a)GetThreadContext works as Sleep(N) and scheduler have enough time to
finish async suspend.
or
b)GetThreadContext always forces suspension to complete before checking
handle's desired access and doing its own job.
or
c)something else, documentation on SuspendThread is weak.


>
>  +        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME |
>> THREAD_GET_CONTEXT,
>> +                            FALSE, thread->tid);
>>       } else {
>>           handle = NULL;
>>       }
>>
>
>
Olga Krishtal June 26, 2015, 10:28 a.m. UTC | #3
On 25/06/15 15:17, Zavadovsky Yan wrote:
> On Thu, Jun 25, 2015 at 12:11 PM, Olga Krishtal <okrishtal@parallels.com>
> wrote:
>
>> On 24/06/15 15:25, Zavadovsky Yan wrote:
>>
>>> Calling SuspendThread() is not enough to suspend Win32 thread.
>>> We need to call GetThreadContext() after SuspendThread()
>>> to make sure that OS have really suspended target thread.
>>> But GetThreadContext() needs for THREAD_GET_CONTEXT
>>> access right on thread object.
>>> More info about this technique can be found here:
>>> http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
>>>
>>> This patch adds THREAD_GET_CONTEXT to OpenThread() arguments
>>> and change oddity 'while(GetThreadContext() == SUCCESS)' to
>>> 'if(GetThreadContext() == FAILED){exit(1);}'.
>>> So this block of code will continue only after successful
>>> grabbing of thread context(i.e. when thread is really suspended).
>>> And halts otherwise with more verbose error message than previous.
>>> Signed-off-by: Zavadovsky Yan <zavadovsky.yan@gmail.com>
>>> ---
>>>    cpus.c                   | 14 ++++++++------
>>>    util/qemu-thread-win32.c |  4 ++--
>>>    2 files changed, 10 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/cpus.c b/cpus.c
>>> index 4f0e54d..0df6a7d 100644
>>> --- a/cpus.c
>>> +++ b/cpus.c
>>> @@ -1089,8 +1089,8 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>>>            CONTEXT tcgContext;
>>>              if (SuspendThread(cpu->hThread) == (DWORD)-1) {
>>> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>>> -                    GetLastError());
>>> +            fprintf(stderr, "qemu:%s: SuspendThread GetLastError:%lu\n",
>>> +                    __func__, GetLastError());
>>>                exit(1);
>>>            }
>>>    @@ -1098,15 +1098,17 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>>>             * suspended until we can get the context.
>>>             */
>>>            tcgContext.ContextFlags = CONTEXT_CONTROL;
>>> -        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
>>> -            continue;
>>>
>> I would like to ask you if you have faced this situation in reality?
>> I have some doubts about changing the while() loop. According to the
>> article -
>>
> SuspendThread is async operation. But GetContext is not async. And
> GetContext forces this pending suspension to complete.
>
> thread may mot be suspended immediately after code, due to the busy
>> scheduler.
>> If we do exit(1) just right after check of GetThreadContext(..) we can
>> miss the situation when
>> scheduler is too busy at the moment and just go down.  From this point of
>> view this while -
>> is busy loop and gives the scheduler the opportunity to do its job.
> If GetThreadContext fails in first call it will fail in next consecutive
> calls for this thread handle.
Afaik GetThreadContext can fail if the thread is in running state, it 
means that
thread has not been suspended yet, for example it is running on other 
cpu with hight irql.
Instead of waiting we are going to go down. I agree that busy loop 
should be somehow changed,
but going down every time.*
*
>
>> So, I am not sure about it.
>>
>>> +        if (GetThreadContext(cpu->hThread, &tcgContext) == 0) {
>>> +            fprintf(stderr, "qemu:%s: GetThreadContext
>>> GetLastError:%lu\n",
>>> +                    __func__, GetLastError());
>>> +            exit(1);
>>>            }
>>>              cpu_signal(0);
>>>              if (ResumeThread(cpu->hThread) == (DWORD)-1) {
>>> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>>> -                    GetLastError());
>>> +            fprintf(stderr, "qemu:%s: ResumeThread GetLastError:%lu\n",
>>> +                    __func__, GetLastError());
>>>                exit(1);
>>>            }
>>>        }
>>> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
>>> index 406b52f..823eca1 100644
>>> --- a/util/qemu-thread-win32.c
>>> +++ b/util/qemu-thread-win32.c
>>> @@ -406,8 +406,8 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
>>>          EnterCriticalSection(&data->cs);
>>>        if (!data->exited) {
>>> -        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
>>> -                            thread->tid);
>>>
>> What was before the usage of this flag?
> GetThreadContext returns zero, i.e. error. GetLastError says "5" i.e.
> "access denied".
>
> I mean the behavior? As I can see it worked even without this flag.
>
> This code works because:
> a)GetThreadContext works as Sleep(N) and scheduler have enough time to
> finish async suspend.
> or
> b)GetThreadContext always forces suspension to complete before checking
> handle's desired access and doing its own job.
> or
> c)something else, documentation on SuspendThread is weak.
>
>
>>   +        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME |
>>> THREAD_GET_CONTEXT,
>>> +                            FALSE, thread->tid);
>>>        } else {
>>>            handle = NULL;
>>>        }
>>>
>>
Zavadovsky Yan June 26, 2015, 11:03 a.m. UTC | #4
On Fri, Jun 26, 2015 at 1:28 PM, Olga Krishtal <okrishtal@parallels.com>
wrote:

>  On 25/06/15 15:17, Zavadovsky Yan wrote:
>
> On Thu, Jun 25, 2015 at 12:11 PM, Olga Krishtal <okrishtal@parallels.com> <okrishtal@parallels.com>
> wrote:
>
>
>  On 24/06/15 15:25, Zavadovsky Yan wrote:
>
>
>  Calling SuspendThread() is not enough to suspend Win32 thread.
> We need to call GetThreadContext() after SuspendThread()
> to make sure that OS have really suspended target thread.
> But GetThreadContext() needs for THREAD_GET_CONTEXT
> access right on thread object.
> More info about this technique can be found here:http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
>
> This patch adds THREAD_GET_CONTEXT to OpenThread() arguments
> and change oddity 'while(GetThreadContext() == SUCCESS)' to
> 'if(GetThreadContext() == FAILED){exit(1);}'.
> So this block of code will continue only after successful
> grabbing of thread context(i.e. when thread is really suspended).
> And halts otherwise with more verbose error message than previous.
> Signed-off-by: Zavadovsky Yan <zavadovsky.yan@gmail.com> <zavadovsky.yan@gmail.com>
> ---
>   cpus.c                   | 14 ++++++++------
>   util/qemu-thread-win32.c |  4 ++--
>   2 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 4f0e54d..0df6a7d 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1089,8 +1089,8 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>           CONTEXT tcgContext;
>             if (SuspendThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> +            fprintf(stderr, "qemu:%s: SuspendThread GetLastError:%lu\n",
> +                    __func__, GetLastError());
>               exit(1);
>           }
>   @@ -1098,15 +1098,17 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>            * suspended until we can get the context.
>            */
>           tcgContext.ContextFlags = CONTEXT_CONTROL;
> -        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
> -            continue;
>
>
>  I would like to ask you if you have faced this situation in reality?
> I have some doubts about changing the while() loop. According to the
> article -
>
>
>  SuspendThread is async operation. But GetContext is not async. And
> GetContext forces this pending suspension to complete.
>
> thread may mot be suspended immediately after code, due to the busy
>
>  scheduler.
> If we do exit(1) just right after check of GetThreadContext(..) we can
> miss the situation when
> scheduler is too busy at the moment and just go down.  From this point of
> view this while -
> is busy loop and gives the scheduler the opportunity to do its job.
>
>  If GetThreadContext fails in first call it will fail in next consecutive
> calls for this thread handle.
>
>  Afaik GetThreadContext can fail if the thread is in running state, it
> means that
> thread has not been suspended yet, for example it is running on other cpu
> with hight irql.
>
Citation from
http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx :
>If you want to make sure the thread really is suspended, you need to
perform a synchronous operation that is dependent on the fact that the
thread is suspended.
>The traditional way of doing this is to call Get­Thread­Context, since
this requires the kernel to read from the context of the suspended thread,
which has as a prerequisite that the context be saved in the first place,
which has as a prerequisite that the thread be suspended.
It's written by MS insider. Qemu maintainers treat him as reliable source -
http://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg05929.html

Also you can compile sample from this article and run it. You will get "Huh?
The variable lValue was modified by a suspended thread?" many times.
But if you modify this sample in this manner:
while (1) { memset(&Context, 0, sizeof(Context)); Context.ContextFlags =
CONTEXT_CONTROL; if (SuspendThread(thread) == (DWORD)-1) abort(); if
(GetThreadContext(thread, &Context) == 0) { printf("GetThreadContext
failed\n"); } if (InterlockedOr(&lValue, 0) != InterlockedOr(&lValue, 0)) {
printf("Huh? The variable lValue was modified by a suspended thread?\n"); }
ResumeThread(thread); }
Compile, run and you will see that "Huh? The variable lValue was modified
by a suspended thread?" message will not shown anymore.


> Instead of waiting we are going to go down. I agree that busy loop should
> be somehow changed,
> but going down every time.
>
>   So, I am not sure about it.
>
>
>  +        if (GetThreadContext(cpu->hThread, &tcgContext) == 0) {
> +            fprintf(stderr, "qemu:%s: GetThreadContext
> GetLastError:%lu\n",
> +                    __func__, GetLastError());
> +            exit(1);
>           }
>             cpu_signal(0);
>             if (ResumeThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> +            fprintf(stderr, "qemu:%s: ResumeThread GetLastError:%lu\n",
> +                    __func__, GetLastError());
>               exit(1);
>           }
>       }
> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
> index 406b52f..823eca1 100644
> --- a/util/qemu-thread-win32.c
> +++ b/util/qemu-thread-win32.c
> @@ -406,8 +406,8 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
>         EnterCriticalSection(&data->cs);
>       if (!data->exited) {
> -        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
> -                            thread->tid);
>
>
>  What was before the usage of this flag?
>
>  GetThreadContext returns zero, i.e. error. GetLastError says "5" i.e.
> "access denied".
>
> I mean the behavior? As I can see it worked even without this flag.
>
> This code works because:
> a)GetThreadContext works as Sleep(N) and scheduler have enough time to
> finish async suspend.
> or
> b)GetThreadContext always forces suspension to complete before checking
> handle's desired access and doing its own job.
> or
> c)something else, documentation on SuspendThread is weak.
>
>
>
>   +        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME |
>
>  THREAD_GET_CONTEXT,
> +                            FALSE, thread->tid);
>       } else {
>           handle = NULL;
>       }
>
>
>
>
Zavadovsky Yan July 1, 2015, 3:48 p.m. UTC | #5
Ping.

Patchwork: http://patchwork.ozlabs.org/patch/488073/

v1 discussion:
http://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg05791.html
and patchworks: http://patchwork.ozlabs.org/patch/487438/
http://patchwork.ozlabs.org/patch/487566/

On Wed, Jun 24, 2015 at 3:25 PM, Zavadovsky Yan <zavadovsky.yan@gmail.com>
wrote:

> Calling SuspendThread() is not enough to suspend Win32 thread.
> We need to call GetThreadContext() after SuspendThread()
> to make sure that OS have really suspended target thread.
> But GetThreadContext() needs for THREAD_GET_CONTEXT
> access right on thread object.
> More info about this technique can be found here:
> http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
>
> This patch adds THREAD_GET_CONTEXT to OpenThread() arguments
> and change oddity 'while(GetThreadContext() == SUCCESS)' to
> 'if(GetThreadContext() == FAILED){exit(1);}'.
> So this block of code will continue only after successful
> grabbing of thread context(i.e. when thread is really suspended).
> And halts otherwise with more verbose error message than previous.
>
> Signed-off-by: Zavadovsky Yan <zavadovsky.yan@gmail.com>
> ---
>  cpus.c                   | 14 ++++++++------
>  util/qemu-thread-win32.c |  4 ++--
>  2 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 4f0e54d..0df6a7d 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1089,8 +1089,8 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>          CONTEXT tcgContext;
>
>          if (SuspendThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> +            fprintf(stderr, "qemu:%s: SuspendThread GetLastError:%lu\n",
> +                    __func__, GetLastError());
>              exit(1);
>          }
>
> @@ -1098,15 +1098,17 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>           * suspended until we can get the context.
>           */
>          tcgContext.ContextFlags = CONTEXT_CONTROL;
> -        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
> -            continue;
> +        if (GetThreadContext(cpu->hThread, &tcgContext) == 0) {
> +            fprintf(stderr, "qemu:%s: GetThreadContext
> GetLastError:%lu\n",
> +                    __func__, GetLastError());
> +            exit(1);
>          }
>
>          cpu_signal(0);
>
>          if (ResumeThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> +            fprintf(stderr, "qemu:%s: ResumeThread GetLastError:%lu\n",
> +                    __func__, GetLastError());
>              exit(1);
>          }
>      }
> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
> index 406b52f..823eca1 100644
> --- a/util/qemu-thread-win32.c
> +++ b/util/qemu-thread-win32.c
> @@ -406,8 +406,8 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
>
>      EnterCriticalSection(&data->cs);
>      if (!data->exited) {
> -        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
> -                            thread->tid);
> +        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME |
> THREAD_GET_CONTEXT,
> +                            FALSE, thread->tid);
>      } else {
>          handle = NULL;
>      }
> --
> 2.4.4.windows.2
>
>
Paolo Bonzini July 1, 2015, 4:49 p.m. UTC | #6
On 01/07/2015 17:48, Zavadovsky Yan wrote:
> Ping.

Stefan, are you merging this?

Paolo

> Patchwork: http://patchwork.ozlabs.org/patch/488073/
> 
> v1
> discussion: http://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg05791.html
> and patchworks: http://patchwork.ozlabs.org/patch/487438/
> http://patchwork.ozlabs.org/patch/487566/
> 
> On Wed, Jun 24, 2015 at 3:25 PM, Zavadovsky Yan
> <zavadovsky.yan@gmail.com <mailto:zavadovsky.yan@gmail.com>> wrote:
> 
>     Calling SuspendThread() is not enough to suspend Win32 thread.
>     We need to call GetThreadContext() after SuspendThread()
>     to make sure that OS have really suspended target thread.
>     But GetThreadContext() needs for THREAD_GET_CONTEXT
>     access right on thread object.
>     More info about this technique can be found here:
>     http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
> 
>     This patch adds THREAD_GET_CONTEXT to OpenThread() arguments
>     and change oddity 'while(GetThreadContext() == SUCCESS)' to
>     'if(GetThreadContext() == FAILED){exit(1);}'.
>     So this block of code will continue only after successful
>     grabbing of thread context(i.e. when thread is really suspended).
>     And halts otherwise with more verbose error message than previous.
> 
>     Signed-off-by: Zavadovsky Yan <zavadovsky.yan@gmail.com
>     <mailto:zavadovsky.yan@gmail.com>>
>     ---
>      cpus.c                   | 14 ++++++++------
>      util/qemu-thread-win32.c |  4 ++--
>      2 files changed, 10 insertions(+), 8 deletions(-)
> 
>     diff --git a/cpus.c b/cpus.c
>     index 4f0e54d..0df6a7d 100644
>     --- a/cpus.c
>     +++ b/cpus.c
>     @@ -1089,8 +1089,8 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>              CONTEXT tcgContext;
> 
>              if (SuspendThread(cpu->hThread) == (DWORD)-1) {
>     -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>     -                    GetLastError());
>     +            fprintf(stderr, "qemu:%s: SuspendThread
>     GetLastError:%lu\n",
>     +                    __func__, GetLastError());
>                  exit(1);
>              }
> 
>     @@ -1098,15 +1098,17 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>               * suspended until we can get the context.
>               */
>              tcgContext.ContextFlags = CONTEXT_CONTROL;
>     -        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
>     -            continue;
>     +        if (GetThreadContext(cpu->hThread, &tcgContext) == 0) {
>     +            fprintf(stderr, "qemu:%s: GetThreadContext
>     GetLastError:%lu\n",
>     +                    __func__, GetLastError());
>     +            exit(1);
>              }
> 
>              cpu_signal(0);
> 
>              if (ResumeThread(cpu->hThread) == (DWORD)-1) {
>     -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>     -                    GetLastError());
>     +            fprintf(stderr, "qemu:%s: ResumeThread GetLastError:%lu\n",
>     +                    __func__, GetLastError());
>                  exit(1);
>              }
>          }
>     diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
>     index 406b52f..823eca1 100644
>     --- a/util/qemu-thread-win32.c
>     +++ b/util/qemu-thread-win32.c
>     @@ -406,8 +406,8 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
> 
>          EnterCriticalSection(&data->cs);
>          if (!data->exited) {
>     -        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
>     -                            thread->tid);
>     +        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME |
>     THREAD_GET_CONTEXT,
>     +                            FALSE, thread->tid);
>          } else {
>              handle = NULL;
>          }
>     --
>     2.4.4.windows.2
> 
>
Liviu Ionescu July 1, 2015, 5 p.m. UTC | #7
sorry for being partly off-topic, but the last time I checked the windows builds, there was a problem on win64, applications using timers (like Cortex-M SysTick) failed. 

the tests were performed both with my fork and with Stefan's official version, with the same results.

I saw there were some win patches recently, but I do not know if they addressed this issue too.

it would be great if someone with more win64 experience could investigate and possibly fix this.


regards,

Liviu
Stefan Weil July 1, 2015, 6 p.m. UTC | #8
Am 01.07.2015 um 18:49 schrieb Paolo Bonzini:
>
> On 01/07/2015 17:48, Zavadovsky Yan wrote:
>> Ping.
> Stefan, are you merging this?
>
> Paolo

I can do so, but as the current code seems to fix the problems
with multi-processor systems, too (even if it is unclear why),
it does not look urgent.

Fabien, you suggested "extensive tests". Do you think that
patch v2 is fine, or are you still waiting for test results?

Stefan
Fabien Chouteau July 2, 2015, 4:52 p.m. UTC | #9
On 07/01/2015 08:00 PM, Stefan Weil wrote:
> Am 01.07.2015 um 18:49 schrieb Paolo Bonzini:
>>
>> On 01/07/2015 17:48, Zavadovsky Yan wrote:
>>> Ping.
>> Stefan, are you merging this?
>>
>> Paolo
> 
> I can do so, but as the current code seems to fix the problems
> with multi-processor systems, too (even if it is unclear why),
> it does not look urgent.
> 
> Fabien, you suggested "extensive tests". Do you think that
> patch v2 is fine, or are you still waiting for test results?
> 

The patch looks good. I won't be able to do heavy testing anytime soon,
if Yan tells us that the new code was tested I will take his word for it.

Thanks,
Zavadovsky Yan July 2, 2015, 7:09 p.m. UTC | #10
I tested this patch on my 4-cores cpu.
Debug and release builds both.
Win32 and Win64 binaries both. (I used old Fedora 17-18 with SJLJ mingw-w64
to crossbuild for Win64.)
With default Qemu BIOS and with myself-builded OVMF(also debug and
release) from EDK2.

Also I did some synthetic tests with sample from this article:
http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
modified as I described here:
http://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg06894.html

On Thu, Jul 2, 2015 at 7:52 PM, Fabien Chouteau <chouteau@adacore.com>
wrote:

> On 07/01/2015 08:00 PM, Stefan Weil wrote:
> > Am 01.07.2015 um 18:49 schrieb Paolo Bonzini:
> >>
> >> On 01/07/2015 17:48, Zavadovsky Yan wrote:
> >>> Ping.
> >> Stefan, are you merging this?
> >>
> >> Paolo
> >
> > I can do so, but as the current code seems to fix the problems
> > with multi-processor systems, too (even if it is unclear why),
> > it does not look urgent.
> >
> > Fabien, you suggested "extensive tests". Do you think that
> > patch v2 is fine, or are you still waiting for test results?
> >
>
> The patch looks good. I won't be able to do heavy testing anytime soon,
> if Yan tells us that the new code was tested I will take his word for it.
>
> Thanks,
>
>
Fabien Chouteau July 6, 2015, 9:29 a.m. UTC | #11
On 07/02/2015 09:09 PM, Zavadovsky Yan wrote:
> I tested this patch on my 4-cores cpu.
> Debug and release builds both.
> Win32 and Win64 binaries both. (I used old Fedora 17-18 with SJLJ mingw-w64 to crossbuild for Win64.)
> With default Qemu BIOS and with myself-builded OVMF(also debug and release) from EDK2.
> 
> Also I did some synthetic tests with sample from this article:
> http://blogs.msdn.com/b/oldnewthing/archive/2015/02/05/10591215.aspx
> modified as I described here:
> http://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg06894.html
> 

That's good for me.

Thanks Yan,
Zavadovsky Yan July 14, 2015, 7:44 p.m. UTC | #12
On Wed, Jul 1, 2015 at 9:00 PM, Stefan Weil <sw@weilnetz.de> wrote:
>
> Fabien, you suggested "extensive tests". Do you think that
> patch v2 is fine, or are you still waiting for test results?


On Mon, Jul 6, 2015 at 12:29 PM, Fabien Chouteau <chouteau@adacore.com>
wrote:

> That's good for me.
>

Ping...
http://patchwork.ozlabs.org/patch/488073/
Stefan Weil July 14, 2015, 8:29 p.m. UTC | #13
Am 14.07.2015 um 21:44 schrieb Zavadovsky Yan:
> On Wed, Jul 1, 2015 at 9:00 PM, Stefan Weil <sw@weilnetz.de 
> <mailto:sw@weilnetz.de>> wrote:
>
>     Fabien, you suggested "extensive tests". Do you think that
>     patch v2 is fine, or are you still waiting for test results?
>
> On Mon, Jul 6, 2015 at 12:29 PM, Fabien Chouteau <chouteau@adacore.com 
> <mailto:chouteau@adacore.com>> wrote:
>
>     That's good for me.
>
>
> Ping...
> http://patchwork.ozlabs.org/patch/488073/


I'll send a pull request for this patch after the release of QEMU 2.4.
Or do you think that the patch is necessary for 2.4?

Regards
Stefan
Zavadovsky Yan July 14, 2015, 8:32 p.m. UTC | #14
On Tue, Jul 14, 2015 at 11:29 PM, Stefan Weil <sw@weilnetz.de> wrote:

>  I'll send a pull request for this patch after the release of QEMU 2.4.
>
Ok. Thanks.
Stefan Weil Sept. 10, 2015, 8:06 p.m. UTC | #15
Am 14.07.2015 um 22:32 schrieb Zavadovsky Yan:
> On Tue, Jul 14, 2015 at 11:29 PM, Stefan Weil <sw@weilnetz.de> wrote:
> 
>>  I'll send a pull request for this patch after the release of QEMU 2.4.
>>
> Ok. Thanks.
> 

Thank you for this patch. It is now in my patch queue
(git://qemu.weilnetz.de/qemu.git wxx).

Stefan
diff mbox

Patch

diff --git a/cpus.c b/cpus.c
index 4f0e54d..0df6a7d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1089,8 +1089,8 @@  static void qemu_cpu_kick_thread(CPUState *cpu)
         CONTEXT tcgContext;
 
         if (SuspendThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
+            fprintf(stderr, "qemu:%s: SuspendThread GetLastError:%lu\n",
+                    __func__, GetLastError());
             exit(1);
         }
 
@@ -1098,15 +1098,17 @@  static void qemu_cpu_kick_thread(CPUState *cpu)
          * suspended until we can get the context.
          */
         tcgContext.ContextFlags = CONTEXT_CONTROL;
-        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
-            continue;
+        if (GetThreadContext(cpu->hThread, &tcgContext) == 0) {
+            fprintf(stderr, "qemu:%s: GetThreadContext GetLastError:%lu\n",
+                    __func__, GetLastError());
+            exit(1);
         }
 
         cpu_signal(0);
 
         if (ResumeThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
+            fprintf(stderr, "qemu:%s: ResumeThread GetLastError:%lu\n",
+                    __func__, GetLastError());
             exit(1);
         }
     }
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 406b52f..823eca1 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -406,8 +406,8 @@  HANDLE qemu_thread_get_handle(QemuThread *thread)
 
     EnterCriticalSection(&data->cs);
     if (!data->exited) {
-        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
-                            thread->tid);
+        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT,
+                            FALSE, thread->tid);
     } else {
         handle = NULL;
     }