diff mbox

[v8,2/9] icount: exit cpu loop on expire

Message ID 20170126123423.5412.2388.stgit@PASHA-ISP
State New
Headers show

Commit Message

Pavel Dovgalyuk Jan. 26, 2017, 12:34 p.m. UTC
This patch adds check to break cpu loop when icount expires without
setting the TB_EXIT_ICOUNT_EXPIRED flag. It happens when there is no
available translated blocks and all instructions were executed.
In icount replay mode unnecessary tb_find will be called (which may
cause an exception) and execution will be non-deterministic.

v8: refactored loop exit code and moved it to separate function

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
---
 cpu-exec.c |   24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

Comments

Paolo Bonzini Jan. 26, 2017, 1:02 p.m. UTC | #1
On 26/01/2017 13:34, Pavel Dovgalyuk wrote:
> This patch adds check to break cpu loop when icount expires without
> setting the TB_EXIT_ICOUNT_EXPIRED flag. It happens when there is no
> available translated blocks and all instructions were executed.
> In icount replay mode unnecessary tb_find will be called (which may
> cause an exception) and execution will be non-deterministic.
> 
> v8: refactored loop exit code and moved it to separate function
> 
> Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
> ---
>  cpu-exec.c |   24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/cpu-exec.c b/cpu-exec.c
> index fa08c73..f9b8ec8 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -523,9 +523,25 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
>              *last_tb = NULL;
>          }
>      }
> -    if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
> +}
> +
> +
> +static void cpu_check_loop_exit(CPUState *cpu)
> +{
> +    if (unlikely(atomic_read(&cpu->exit_request)
> +        /* icount has expired, we need to break the execution loop.
> +           This check is needed before tb_find to make execution
> +           deterministic - tb_find may cause an exception
> +           while translating the code from non-mapped page. */
> +        || (use_icount && ((cpu->icount_extra == 0
> +                        && cpu->icount_decr.u16.low == 0)
> +                    || (int32_t)cpu->icount_decr.u32 < 0)))) {

Simpler:

	use_icount &&
	((int32_t)cpu->icount_decr.u32 < 0 ||
	 cpu->icount_decr.low + cpu->icount_extra == 0)

But I'm not sure that you need to test u32.  After all you're not
testing tcg_exit_req, which is the equivalent when icount is disabled.

>          atomic_set(&cpu->exit_request, 0);
> -        cpu->exception_index = EXCP_INTERRUPT;
> +        /* If there is an exception that wasn't replayed yet,
> +           don't change exception_index. */
> +        if (cpu->exception_index == -1) {
> +            cpu->exception_index = EXCP_INTERRUPT;
> +        }
>          cpu_loop_exit(cpu);

The siglongjmp is effectively the same as exiting the for(;;) loop of
cpu_exec and going back to cpu_handle_exception.  So I would just merge
this with cpu_handle_interrupt, which exits a lot with cpu_loop_exit too.

All cpu_loop_exit() calls in cpu_handle_interrupt become "return true",
similar to cpu_handle_exception, and then in cpu_exec you have:

            /* if an exception is pending, we execute it here */
            while (!cpu_handle_exception(cpu, &ret)) {
                /* if an interrupt is pending, inject it and go back
                 * to cpu_handle_exception.
                 */
                while (!cpu_handle_interrupt(cpu, &last_tb)) {
                    tb = tb_find(cpu, last_tb, tb_exit);
                    cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
                    /* Try to align the host and virtual clocks
                       if the guest is in advance */
                    align_clocks(&sc, cpu);
                }
            }
            break;



>  #endif
> @@ -634,6 +647,7 @@ int cpu_exec(CPUState *cpu)
>  
>              for(;;) {
>                  cpu_handle_interrupt(cpu, &last_tb);
> +                cpu_check_loop_exit(cpu);


	if (cpu_should_check_exception_or_exit(cpu)) {
	    break;
	}


>                  tb = tb_find(cpu, last_tb, tb_exit);
>                  cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
>                  /* Try to align the host and virtual clocks
> 
> 
>
Pavel Dovgalyuk Jan. 26, 2017, 1:37 p.m. UTC | #2
> From: Paolo Bonzini [mailto:paolo.bonzini@gmail.com] On Behalf Of Paolo Bonzini
> On 26/01/2017 13:34, Pavel Dovgalyuk wrote:
> > This patch adds check to break cpu loop when icount expires without
> > setting the TB_EXIT_ICOUNT_EXPIRED flag. It happens when there is no
> > available translated blocks and all instructions were executed.
> > In icount replay mode unnecessary tb_find will be called (which may
> > cause an exception) and execution will be non-deterministic.
> >
> > v8: refactored loop exit code and moved it to separate function
> >
> > Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
> > ---
> >  cpu-exec.c |   24 +++++++++++++++++++-----
> >  1 file changed, 19 insertions(+), 5 deletions(-)
> >
> > diff --git a/cpu-exec.c b/cpu-exec.c
> > index fa08c73..f9b8ec8 100644
> > --- a/cpu-exec.c
> > +++ b/cpu-exec.c
> > @@ -523,9 +523,25 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
> >              *last_tb = NULL;
> >          }
> >      }
> > -    if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
> > +}
> > +
> > +
> > +static void cpu_check_loop_exit(CPUState *cpu)
> > +{
> > +    if (unlikely(atomic_read(&cpu->exit_request)
> > +        /* icount has expired, we need to break the execution loop.
> > +           This check is needed before tb_find to make execution
> > +           deterministic - tb_find may cause an exception
> > +           while translating the code from non-mapped page. */
> > +        || (use_icount && ((cpu->icount_extra == 0
> > +                        && cpu->icount_decr.u16.low == 0)
> > +                    || (int32_t)cpu->icount_decr.u32 < 0)))) {
> 
> Simpler:
> 
> 	use_icount &&
> 	((int32_t)cpu->icount_decr.u32 < 0 ||
> 	 cpu->icount_decr.low + cpu->icount_extra == 0)

Right.

> But I'm not sure that you need to test u32.  After all you're not

Checking u32 is needed, because sometimes it is less than zero.
Consider the code in cpu_loop_exec_tb:
        int insns_left = cpu->icount_decr.u32;
            if (insns_left > 0) {
It is set to negative in tcg_handle_interrupt.
Ten days ago you also offered setting high bits of u32 in gen_icount:

+    /* Make the next TB exit immediately with TB_EXIT_ICOUNT_EXPIRED.  */
+    tcg_gen_st16_i32(-1, cpu_env,
+                     -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.high));



> testing tcg_exit_req, which is the equivalent when icount is disabled.

Yes, if we want to refactor the whole loop.

> >          atomic_set(&cpu->exit_request, 0);
> > -        cpu->exception_index = EXCP_INTERRUPT;
> > +        /* If there is an exception that wasn't replayed yet,
> > +           don't change exception_index. */
> > +        if (cpu->exception_index == -1) {
> > +            cpu->exception_index = EXCP_INTERRUPT;
> > +        }
> >          cpu_loop_exit(cpu);
> 
> The siglongjmp is effectively the same as exiting the for(;;) loop of
> cpu_exec and going back to cpu_handle_exception.  So I would just merge
> this with cpu_handle_interrupt, which exits a lot with cpu_loop_exit too.

Do you want me to create new version of the patch, which changes the loop this way?

> All cpu_loop_exit() calls in cpu_handle_interrupt become "return true",
> similar to cpu_handle_exception, and then in cpu_exec you have:
> 
>             /* if an exception is pending, we execute it here */
>             while (!cpu_handle_exception(cpu, &ret)) {
>                 /* if an interrupt is pending, inject it and go back
>                  * to cpu_handle_exception.
>                  */
>                 while (!cpu_handle_interrupt(cpu, &last_tb)) {
>                     tb = tb_find(cpu, last_tb, tb_exit);
>                     cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
>                     /* Try to align the host and virtual clocks
>                        if the guest is in advance */
>                     align_clocks(&sc, cpu);
>                 }
>             }
>             break;

It might even work faster without excessive cpu_loop_exit calls.

Pavel Dovgalyuk
Paolo Bonzini Jan. 26, 2017, 2:28 p.m. UTC | #3
On 26/01/2017 14:37, Pavel Dovgalyuk wrote:
>> Simpler:
>>
>> 	use_icount &&
>> 	((int32_t)cpu->icount_decr.u32 < 0 ||
>> 	 cpu->icount_decr.u16.low + cpu->icount_extra == 0)
> Right.
> 
>> But I'm not sure that you need to test u32.  After all you're not
> Checking u32 is needed, because sometimes it is less than zero.

If cpu->icount_decr.u32 is less than zero, the next translation block
would immediately exit with TB_EXIT_ICOUNT_EXPIRED, causing

            cpu->exception_index = EXCP_INTERRUPT;
            *last_tb = NULL;
            cpu_loop_exit(cpu);

from cpu_loop_exec_tb's "case TB_EXIT_ICOUNT_EXPIRED".

And the same is true for cpu->icount_decr.u16.low + cpu->icount_extra ==
0, so I don't understand why this part of the patch is necessary.

Paolo
Pavel Dovgalyuk Jan. 26, 2017, 2:32 p.m. UTC | #4
> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> On 26/01/2017 14:37, Pavel Dovgalyuk wrote:
> >> Simpler:
> >>
> >> 	use_icount &&
> >> 	((int32_t)cpu->icount_decr.u32 < 0 ||
> >> 	 cpu->icount_decr.u16.low + cpu->icount_extra == 0)
> > Right.
> >
> >> But I'm not sure that you need to test u32.  After all you're not
> > Checking u32 is needed, because sometimes it is less than zero.
> 
> If cpu->icount_decr.u32 is less than zero, the next translation block
> would immediately exit with TB_EXIT_ICOUNT_EXPIRED, causing
> 
>             cpu->exception_index = EXCP_INTERRUPT;
>             *last_tb = NULL;
>             cpu_loop_exit(cpu);
> 
> from cpu_loop_exec_tb's "case TB_EXIT_ICOUNT_EXPIRED".
> 
> And the same is true for cpu->icount_decr.u16.low + cpu->icount_extra ==
> 0, so I don't understand why this part of the patch is necessary.

I removed that lines because we have to check icount=0 not only when it is expired,
but also when all instructions were executed successfully. 
If there are no instructions to execute, calling tb_find (and translation then)
may cause an exception at the wrong moment.

Pavel Dovgalyuk
Paolo Bonzini Jan. 26, 2017, 2:44 p.m. UTC | #5
On 26/01/2017 15:32, Pavel Dovgalyuk wrote:
>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>> On 26/01/2017 14:37, Pavel Dovgalyuk wrote:
>>>> Simpler:
>>>>
>>>> 	use_icount &&
>>>> 	((int32_t)cpu->icount_decr.u32 < 0 ||
>>>> 	 cpu->icount_decr.u16.low + cpu->icount_extra == 0)
>>> Right.
>>>
>>>> But I'm not sure that you need to test u32.  After all you're not
>>> Checking u32 is needed, because sometimes it is less than zero.
>>
>> If cpu->icount_decr.u32 is less than zero, the next translation block
>> would immediately exit with TB_EXIT_ICOUNT_EXPIRED, causing
>>
>>             cpu->exception_index = EXCP_INTERRUPT;
>>             *last_tb = NULL;
>>             cpu_loop_exit(cpu);
>>
>> from cpu_loop_exec_tb's "case TB_EXIT_ICOUNT_EXPIRED".
>>
>> And the same is true for cpu->icount_decr.u16.low + cpu->icount_extra ==
>> 0, so I don't understand why this part of the patch is necessary.
> 
> I removed that lines because we have to check icount=0 not only when it is expired,
> but also when all instructions were executed successfully. 
> If there are no instructions to execute, calling tb_find (and translation then)
> may cause an exception at the wrong moment.

Ok, that makes sense for cpu->icount_decr.u16.low + cpu->icount_extra == 0.

But for decr.u32 < 0, the same reasoning of this comment is also true:

        /* Something asked us to stop executing
         * chained TBs; just continue round the main
         * loop. Whatever requested the exit will also
         * have set something else (eg exit_request or
         * interrupt_request) which we will handle
         * next time around the loop.  But we need to
         * ensure the tcg_exit_req read in generated code
         * comes before the next read of cpu->exit_request
         * or cpu->interrupt_request.
         */

(and by the way, this means that you need an smp_rmb in case
TB_EXIT_ICOUNT_EXPIRED).

Thanks,

Paolo
Pavel Dovgalyuk Jan. 27, 2017, 6:09 a.m. UTC | #6
> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> On 26/01/2017 15:32, Pavel Dovgalyuk wrote:
> >> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> >> On 26/01/2017 14:37, Pavel Dovgalyuk wrote:
> >>>> Simpler:
> >>>>
> >>>> 	use_icount &&
> >>>> 	((int32_t)cpu->icount_decr.u32 < 0 ||
> >>>> 	 cpu->icount_decr.u16.low + cpu->icount_extra == 0)
> >>> Right.
> >>>
> >>>> But I'm not sure that you need to test u32.  After all you're not
> >>> Checking u32 is needed, because sometimes it is less than zero.
> >>
> >> If cpu->icount_decr.u32 is less than zero, the next translation block
> >> would immediately exit with TB_EXIT_ICOUNT_EXPIRED, causing
> >>
> >>             cpu->exception_index = EXCP_INTERRUPT;
> >>             *last_tb = NULL;	
> >>             cpu_loop_exit(cpu);
> >>
> >> from cpu_loop_exec_tb's "case TB_EXIT_ICOUNT_EXPIRED".
> >>
> >> And the same is true for cpu->icount_decr.u16.low + cpu->icount_extra ==
> >> 0, so I don't understand why this part of the patch is necessary.
> >
> > I removed that lines because we have to check icount=0 not only when it is expired,
> > but also when all instructions were executed successfully.
> > If there are no instructions to execute, calling tb_find (and translation then)
> > may cause an exception at the wrong moment.
> 
> Ok, that makes sense for cpu->icount_decr.u16.low + cpu->icount_extra == 0.
> 
> But for decr.u32 < 0, the same reasoning of this comment is also true:
> 
>         /* Something asked us to stop executing
>          * chained TBs; just continue round the main
>          * loop. Whatever requested the exit will also
>          * have set something else (eg exit_request or
>          * interrupt_request) which we will handle
>          * next time around the loop.  But we need to
>          * ensure the tcg_exit_req read in generated code
>          * comes before the next read of cpu->exit_request
>          * or cpu->interrupt_request.
>          */

Right. If the following lines will not be removed (as opposite to my patch) then checking
decr.u32 < 0 will not be needed.
-             cpu->exception_index = EXCP_INTERRUPT;
-             *last_tb = NULL;	
-             cpu_loop_exit(cpu);

What is your point about the new version of that patch?

Pavel Dovgalyuk
Paolo Bonzini Jan. 27, 2017, 11:02 a.m. UTC | #7
On 27/01/2017 07:09, Pavel Dovgalyuk wrote:
>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>> On 26/01/2017 15:32, Pavel Dovgalyuk wrote:
>>>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>>>> On 26/01/2017 14:37, Pavel Dovgalyuk wrote:
>>>>>> Simpler:
>>>>>>
>>>>>> 	use_icount &&
>>>>>> 	((int32_t)cpu->icount_decr.u32 < 0 ||
>>>>>> 	 cpu->icount_decr.u16.low + cpu->icount_extra == 0)
>>>>> Right.
>>>>>
>>>>>> But I'm not sure that you need to test u32.  After all you're not
>>>>> Checking u32 is needed, because sometimes it is less than zero.
>>>>
>>>> If cpu->icount_decr.u32 is less than zero, the next translation block
>>>> would immediately exit with TB_EXIT_ICOUNT_EXPIRED, causing
>>>>
>>>>             cpu->exception_index = EXCP_INTERRUPT;
>>>>             *last_tb = NULL;	
>>>>             cpu_loop_exit(cpu);
>>>>
>>>> from cpu_loop_exec_tb's "case TB_EXIT_ICOUNT_EXPIRED".
>>>>
>>>> And the same is true for cpu->icount_decr.u16.low + cpu->icount_extra ==
>>>> 0, so I don't understand why this part of the patch is necessary.
>>>
>>> I removed that lines because we have to check icount=0 not only when it is expired,
>>> but also when all instructions were executed successfully.
>>> If there are no instructions to execute, calling tb_find (and translation then)
>>> may cause an exception at the wrong moment.
>>
>> Ok, that makes sense for cpu->icount_decr.u16.low + cpu->icount_extra == 0.
>>
>> But for decr.u32 < 0, the same reasoning of this comment is also true:
>>
>>         /* Something asked us to stop executing
>>          * chained TBs; just continue round the main
>>          * loop. Whatever requested the exit will also
>>          * have set something else (eg exit_request or
>>          * interrupt_request) which we will handle
>>          * next time around the loop.  But we need to
>>          * ensure the tcg_exit_req read in generated code
>>          * comes before the next read of cpu->exit_request
>>          * or cpu->interrupt_request.
>>          */
> 
> Right. If the following lines will not be removed (as opposite to my patch) then checking
> decr.u32 < 0 will not be needed.

That's what I'm not sure about.  u32 < 0 is only true if you have set
the interrupt_request as well, but interrupt requests are processed in
cpu_handle_interrupt and that doesn't require going back to the main loop.

Let me try some cleanups early next week and come back to you with a
patch to base your work on.

Paolo

> -             cpu->exception_index = EXCP_INTERRUPT;
> -             *last_tb = NULL;	
> -             cpu_loop_exit(cpu);
> 
> What is your point about the new version of that patch?
> 
> Pavel Dovgalyuk
> 
> 
>
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index fa08c73..f9b8ec8 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -523,9 +523,25 @@  static inline void cpu_handle_interrupt(CPUState *cpu,
             *last_tb = NULL;
         }
     }
-    if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
+}
+
+
+static void cpu_check_loop_exit(CPUState *cpu)
+{
+    if (unlikely(atomic_read(&cpu->exit_request)
+        /* icount has expired, we need to break the execution loop.
+           This check is needed before tb_find to make execution
+           deterministic - tb_find may cause an exception
+           while translating the code from non-mapped page. */
+        || (use_icount && ((cpu->icount_extra == 0
+                        && cpu->icount_decr.u16.low == 0)
+                    || (int32_t)cpu->icount_decr.u32 < 0)))) {
         atomic_set(&cpu->exit_request, 0);
-        cpu->exception_index = EXCP_INTERRUPT;
+        /* If there is an exception that wasn't replayed yet,
+           don't change exception_index. */
+        if (cpu->exception_index == -1) {
+            cpu->exception_index = EXCP_INTERRUPT;
+        }
         cpu_loop_exit(cpu);
     }
 }
@@ -578,9 +594,6 @@  static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
                 cpu_exec_nocache(cpu, insns_left, *last_tb, false);
                 align_clocks(sc, cpu);
             }
-            cpu->exception_index = EXCP_INTERRUPT;
-            *last_tb = NULL;
-            cpu_loop_exit(cpu);
         }
         break;
 #endif
@@ -634,6 +647,7 @@  int cpu_exec(CPUState *cpu)
 
             for(;;) {
                 cpu_handle_interrupt(cpu, &last_tb);
+                cpu_check_loop_exit(cpu);
                 tb = tb_find(cpu, last_tb, tb_exit);
                 cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
                 /* Try to align the host and virtual clocks