diff mbox

mttcg: Set jmp_env to handle exit from tb_gen_code

Message ID 87k28ko1qw.fsf@gmail.com
State New
Headers show

Commit Message

Pranith Kumar Feb. 20, 2017, 11:56 p.m. UTC
Alex Bennée writes:

> Pranith Kumar <bobby.prani@gmail.com> writes:
>
>> tb_gen_code() can exit execution using cpu_exit_loop() when it cannot
>> allocate new tb's. To handle this, we need to properly set the jmp_env
>> pointer ahead of calling tb_gen_code().
>>
>> CC:Alex Bennée <alex.bennee@linaro.org>
>> CC: Richard Henderson <rth@twiddle.net>
>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>> ---
>>  cpu-exec.c | 23 +++++++++++------------
>>  1 file changed, 11 insertions(+), 12 deletions(-)
>>
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index 97d79612d9..4b70988b24 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>> @@ -236,23 +236,22 @@ static void cpu_exec_step(CPUState *cpu)
>>
>>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>      tb_lock();
>> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
>> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
>> -    tb->orig_tb = NULL;
>> -    tb_unlock();
>> -
>> -    cc->cpu_exec_enter(cpu);
>> -
>
> It occurs to me we are also diverging in our locking pattern from
> tb_find which takes mmap_lock first. This is a NOP for system emulation
> but needed for user-emulation (for which we can do cpu_exec_step but not
> cpu_exec_nocache).

Right. So we have to take the mmap_lock() before calling
tb_gen_code(). However, this lock is released in the error path before calling
cpu_loop_exit() if allocation of a new tb fails. The following is what I have
after merging with the previous EXCP_ATOMIC handling patch.



--
Pranith

Comments

Alex Bennée Feb. 21, 2017, 12:35 a.m. UTC | #1
Pranith Kumar <bobby.prani@gmail.com> writes:

> Alex Bennée writes:
>
>> Pranith Kumar <bobby.prani@gmail.com> writes:
>>
>>> tb_gen_code() can exit execution using cpu_exit_loop() when it cannot
>>> allocate new tb's. To handle this, we need to properly set the jmp_env
>>> pointer ahead of calling tb_gen_code().
>>>
>>> CC:Alex Bennée <alex.bennee@linaro.org>
>>> CC: Richard Henderson <rth@twiddle.net>
>>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>>> ---
>>>  cpu-exec.c | 23 +++++++++++------------
>>>  1 file changed, 11 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/cpu-exec.c b/cpu-exec.c
>>> index 97d79612d9..4b70988b24 100644
>>> --- a/cpu-exec.c
>>> +++ b/cpu-exec.c
>>> @@ -236,23 +236,22 @@ static void cpu_exec_step(CPUState *cpu)
>>>
>>>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>>      tb_lock();
>>> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
>>> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
>>> -    tb->orig_tb = NULL;
>>> -    tb_unlock();
>>> -
>>> -    cc->cpu_exec_enter(cpu);
>>> -
>>
>> It occurs to me we are also diverging in our locking pattern from
>> tb_find which takes mmap_lock first. This is a NOP for system emulation
>> but needed for user-emulation (for which we can do cpu_exec_step but not
>> cpu_exec_nocache).
>
> Right. So we have to take the mmap_lock() before calling
> tb_gen_code(). However, this lock is released in the error path before calling
> cpu_loop_exit() if allocation of a new tb fails. The following is what I have
> after merging with the previous EXCP_ATOMIC handling patch.

Hmm we are a start/end_exclusive() though so what could we be racing
against that also wants the mmap_lock()? Could it be held by anything at
this point as every user needs to be woken up?

>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index a8e04bffbf..2bb3ba3672 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -228,6 +228,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>
>  static void cpu_exec_step(CPUState *cpu)
>  {
> +    CPUClass *cc = CPU_GET_CLASS(cpu);
>      CPUArchState *env = (CPUArchState *)cpu->env_ptr;
>      TranslationBlock *tb;
>      target_ulong cs_base, pc;
> @@ -235,16 +236,24 @@ static void cpu_exec_step(CPUState *cpu)
>
>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>      tb_lock();
> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> -    tb->orig_tb = NULL;
> -    tb_unlock();
> -    /* execute the generated code */
> -    trace_exec_tb_nocache(tb, pc);
> -    cpu_tb_exec(cpu, tb);
> -    tb_lock();
> -    tb_phys_invalidate(tb, -1);
> -    tb_free(tb);
> +    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
> +        mmap_lock();
> +        tb = tb_gen_code(cpu, pc, cs_base, flags,
> +                         1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> +        tb->orig_tb = NULL;
> +        mmap_unlock();
> +        tb_unlock();
> +
> +        cc->cpu_exec_enter(cpu);
> +        /* execute the generated code */
> +        trace_exec_tb_nocache(tb, pc);
> +        cpu_tb_exec(cpu, tb);
> +        cc->cpu_exec_exit(cpu);
> +
> +        tb_lock();
> +        tb_phys_invalidate(tb, -1);
> +        tb_free(tb);
> +    }
>      tb_unlock();
>  }
>
> diff --git a/cpus.c b/cpus.c
> index 77bba08f9a..b39408b4b1 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1347,6 +1347,11 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
>                  if (r == EXCP_DEBUG) {
>                      cpu_handle_guest_debug(cpu);
>                      break;
> +                } else if (r == EXCP_ATOMIC) {
> +                    qemu_mutex_unlock_iothread();
> +                    cpu_exec_step_atomic(cpu);
> +                    qemu_mutex_lock_iothread();
> +                    break;
>                  }
>              } else if (cpu->stop) {
>                  if (cpu->unplug) {
> @@ -1457,6 +1462,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>                   */
>                  g_assert(cpu->halted);
>                  break;
> +            case EXCP_ATOMIC:
> +                qemu_mutex_unlock_iothread();
> +                cpu_exec_step_atomic(cpu);
> +                qemu_mutex_lock_iothread();
>              default:
>                  /* Ignore everything else? */
>                  break;


--
Alex Bennée
Pranith Kumar Feb. 21, 2017, 4:05 a.m. UTC | #2
On Mon, Feb 20, 2017 at 7:35 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Pranith Kumar <bobby.prani@gmail.com> writes:
>
>> Alex Bennée writes:
>>
>>> Pranith Kumar <bobby.prani@gmail.com> writes:
>>>
>>>> tb_gen_code() can exit execution using cpu_exit_loop() when it cannot
>>>> allocate new tb's. To handle this, we need to properly set the jmp_env
>>>> pointer ahead of calling tb_gen_code().
>>>>
>>>> CC:Alex Bennée <alex.bennee@linaro.org>
>>>> CC: Richard Henderson <rth@twiddle.net>
>>>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>>>> ---
>>>>  cpu-exec.c | 23 +++++++++++------------
>>>>  1 file changed, 11 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/cpu-exec.c b/cpu-exec.c
>>>> index 97d79612d9..4b70988b24 100644
>>>> --- a/cpu-exec.c
>>>> +++ b/cpu-exec.c
>>>> @@ -236,23 +236,22 @@ static void cpu_exec_step(CPUState *cpu)
>>>>
>>>>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>>>      tb_lock();
>>>> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
>>>> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
>>>> -    tb->orig_tb = NULL;
>>>> -    tb_unlock();
>>>> -
>>>> -    cc->cpu_exec_enter(cpu);
>>>> -
>>>
>>> It occurs to me we are also diverging in our locking pattern from
>>> tb_find which takes mmap_lock first. This is a NOP for system emulation
>>> but needed for user-emulation (for which we can do cpu_exec_step but not
>>> cpu_exec_nocache).
>>
>> Right. So we have to take the mmap_lock() before calling
>> tb_gen_code(). However, this lock is released in the error path before calling
>> cpu_loop_exit() if allocation of a new tb fails. The following is what I have
>> after merging with the previous EXCP_ATOMIC handling patch.
>
> Hmm we are a start/end_exclusive() though so what could we be racing
> against that also wants the mmap_lock()? Could it be held by anything at
> this point as every user needs to be woken up?
>

No, I don't think any other thread holds the mmap/tb lock at this
point. However, we still need to take this lock since the functions we
are calling expect us to hold it and assert otherwise.
Alex Bennée Feb. 21, 2017, 3:04 p.m. UTC | #3
Pranith Kumar <bobby.prani@gmail.com> writes:

> Alex Bennée writes:
>
>> Pranith Kumar <bobby.prani@gmail.com> writes:
>>
>>> tb_gen_code() can exit execution using cpu_exit_loop() when it cannot
>>> allocate new tb's. To handle this, we need to properly set the jmp_env
>>> pointer ahead of calling tb_gen_code().
>>>
>>> CC:Alex Bennée <alex.bennee@linaro.org>
>>> CC: Richard Henderson <rth@twiddle.net>
>>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>>> ---
>>>  cpu-exec.c | 23 +++++++++++------------
>>>  1 file changed, 11 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/cpu-exec.c b/cpu-exec.c
>>> index 97d79612d9..4b70988b24 100644
>>> --- a/cpu-exec.c
>>> +++ b/cpu-exec.c
>>> @@ -236,23 +236,22 @@ static void cpu_exec_step(CPUState *cpu)
>>>
>>>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>>      tb_lock();
>>> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
>>> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
>>> -    tb->orig_tb = NULL;
>>> -    tb_unlock();
>>> -
>>> -    cc->cpu_exec_enter(cpu);
>>> -
>>
>> It occurs to me we are also diverging in our locking pattern from
>> tb_find which takes mmap_lock first. This is a NOP for system emulation
>> but needed for user-emulation (for which we can do cpu_exec_step but not
>> cpu_exec_nocache).
>
> Right. So we have to take the mmap_lock() before calling
> tb_gen_code(). However, this lock is released in the error path before calling
> cpu_loop_exit() if allocation of a new tb fails. The following is what I have
> after merging with the previous EXCP_ATOMIC handling patch.
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index a8e04bffbf..2bb3ba3672 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -228,6 +228,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>
>  static void cpu_exec_step(CPUState *cpu)
>  {
> +    CPUClass *cc = CPU_GET_CLASS(cpu);
>      CPUArchState *env = (CPUArchState *)cpu->env_ptr;
>      TranslationBlock *tb;
>      target_ulong cs_base, pc;
> @@ -235,16 +236,24 @@ static void cpu_exec_step(CPUState *cpu)
>
>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>      tb_lock();
> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> -    tb->orig_tb = NULL;
> -    tb_unlock();
> -    /* execute the generated code */
> -    trace_exec_tb_nocache(tb, pc);
> -    cpu_tb_exec(cpu, tb);
> -    tb_lock();
> -    tb_phys_invalidate(tb, -1);
> -    tb_free(tb);
> +    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
> +        mmap_lock();

That gets the locking order the wrong way around - I'm wary of that.

> +        tb = tb_gen_code(cpu, pc, cs_base, flags,
> +                         1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> +        tb->orig_tb = NULL;
> +        mmap_unlock();
> +        tb_unlock();
> +
> +        cc->cpu_exec_enter(cpu);
> +        /* execute the generated code */
> +        trace_exec_tb_nocache(tb, pc);
> +        cpu_tb_exec(cpu, tb);
> +        cc->cpu_exec_exit(cpu);
> +
> +        tb_lock();
> +        tb_phys_invalidate(tb, -1);
> +        tb_free(tb);
> +    }
>      tb_unlock();
>  }
>
> diff --git a/cpus.c b/cpus.c
> index 77bba08f9a..b39408b4b1 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1347,6 +1347,11 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
>                  if (r == EXCP_DEBUG) {
>                      cpu_handle_guest_debug(cpu);
>                      break;
> +                } else if (r == EXCP_ATOMIC) {
> +                    qemu_mutex_unlock_iothread();
> +                    cpu_exec_step_atomic(cpu);
> +                    qemu_mutex_lock_iothread();
> +                    break;
>                  }
>              } else if (cpu->stop) {
>                  if (cpu->unplug) {
> @@ -1457,6 +1462,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>                   */
>                  g_assert(cpu->halted);
>                  break;
> +            case EXCP_ATOMIC:
> +                qemu_mutex_unlock_iothread();
> +                cpu_exec_step_atomic(cpu);
> +                qemu_mutex_lock_iothread();
>              default:
>                  /* Ignore everything else? */
>                  break;


--
Alex Bennée
Pranith Kumar Feb. 21, 2017, 4:17 p.m. UTC | #4
Alex Bennée writes:

> Pranith Kumar <bobby.prani@gmail.com> writes:
>
>> Alex Bennée writes:
>>
>>> Pranith Kumar <bobby.prani@gmail.com> writes:
>>>
>>>> tb_gen_code() can exit execution using cpu_exit_loop() when it cannot
>>>> allocate new tb's. To handle this, we need to properly set the jmp_env
>>>> pointer ahead of calling tb_gen_code().
>>>>
>>>> CC:Alex Bennée <alex.bennee@linaro.org>
>>>> CC: Richard Henderson <rth@twiddle.net>
>>>> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
>>>> ---
>>>>  cpu-exec.c | 23 +++++++++++------------
>>>>  1 file changed, 11 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/cpu-exec.c b/cpu-exec.c
>>>> index 97d79612d9..4b70988b24 100644
>>>> --- a/cpu-exec.c
>>>> +++ b/cpu-exec.c
>>>> @@ -236,23 +236,22 @@ static void cpu_exec_step(CPUState *cpu)
>>>>
>>>>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>>>      tb_lock();
>>>> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
>>>> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
>>>> -    tb->orig_tb = NULL;
>>>> -    tb_unlock();
>>>> -
>>>> -    cc->cpu_exec_enter(cpu);
>>>> -
>>>
>>> It occurs to me we are also diverging in our locking pattern from
>>> tb_find which takes mmap_lock first. This is a NOP for system emulation
>>> but needed for user-emulation (for which we can do cpu_exec_step but not
>>> cpu_exec_nocache).
>>
>> Right. So we have to take the mmap_lock() before calling
>> tb_gen_code(). However, this lock is released in the error path before calling
>> cpu_loop_exit() if allocation of a new tb fails. The following is what I have
>> after merging with the previous EXCP_ATOMIC handling patch.
>>
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index a8e04bffbf..2bb3ba3672 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>> @@ -228,6 +228,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>>
>>  static void cpu_exec_step(CPUState *cpu)
>>  {
>> +    CPUClass *cc = CPU_GET_CLASS(cpu);
>>      CPUArchState *env = (CPUArchState *)cpu->env_ptr;
>>      TranslationBlock *tb;
>>      target_ulong cs_base, pc;
>> @@ -235,16 +236,24 @@ static void cpu_exec_step(CPUState *cpu)
>>
>>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>>      tb_lock();
>> -    tb = tb_gen_code(cpu, pc, cs_base, flags,
>> -                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
>> -    tb->orig_tb = NULL;
>> -    tb_unlock();
>> -    /* execute the generated code */
>> -    trace_exec_tb_nocache(tb, pc);
>> -    cpu_tb_exec(cpu, tb);
>> -    tb_lock();
>> -    tb_phys_invalidate(tb, -1);
>> -    tb_free(tb);
>> +    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
>> +        mmap_lock();
>
> That gets the locking order the wrong way around - I'm wary of that.
>

But we are in exclusive execution now, and we are releasing all the lock taken
before coming out of it. I think that should be OK. If your concern is that
this is not the normal locking pattern, then I agree. Otherwise, it should be fine.
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index a8e04bffbf..2bb3ba3672 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -228,6 +228,7 @@  static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
 
 static void cpu_exec_step(CPUState *cpu)
 {
+    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
@@ -235,16 +236,24 @@  static void cpu_exec_step(CPUState *cpu)
 
     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
     tb_lock();
-    tb = tb_gen_code(cpu, pc, cs_base, flags,
-                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
-    tb->orig_tb = NULL;
-    tb_unlock();
-    /* execute the generated code */
-    trace_exec_tb_nocache(tb, pc);
-    cpu_tb_exec(cpu, tb);
-    tb_lock();
-    tb_phys_invalidate(tb, -1);
-    tb_free(tb);
+    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+        mmap_lock();
+        tb = tb_gen_code(cpu, pc, cs_base, flags,
+                         1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+        tb->orig_tb = NULL;
+        mmap_unlock();
+        tb_unlock();
+
+        cc->cpu_exec_enter(cpu);
+        /* execute the generated code */
+        trace_exec_tb_nocache(tb, pc);
+        cpu_tb_exec(cpu, tb);
+        cc->cpu_exec_exit(cpu);
+
+        tb_lock();
+        tb_phys_invalidate(tb, -1);
+        tb_free(tb);
+    }
     tb_unlock();
 }
 
diff --git a/cpus.c b/cpus.c
index 77bba08f9a..b39408b4b1 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1347,6 +1347,11 @@  static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
                 if (r == EXCP_DEBUG) {
                     cpu_handle_guest_debug(cpu);
                     break;
+                } else if (r == EXCP_ATOMIC) {
+                    qemu_mutex_unlock_iothread();
+                    cpu_exec_step_atomic(cpu);
+                    qemu_mutex_lock_iothread();
+                    break;
                 }
             } else if (cpu->stop) {
                 if (cpu->unplug) {
@@ -1457,6 +1462,10 @@  static void *qemu_tcg_cpu_thread_fn(void *arg)
                  */
                 g_assert(cpu->halted);
                 break;
+            case EXCP_ATOMIC:
+                qemu_mutex_unlock_iothread();
+                cpu_exec_step_atomic(cpu);
+                qemu_mutex_lock_iothread();
             default:
                 /* Ignore everything else? */
                 break;