Message ID | 1402652437-13194-4-git-send-email-sebastian.tanase@openwide.fr |
---|---|
State | New |
Headers | show |
Il 13/06/2014 11:40, Sebastian Tanase ha scritto: > The goal is to sleep qemu whenever the guest clock > is in advance compared to the host clock (we use > the monotonic clocks). The amount of time to sleep > is calculated in the execution loop in cpu_exec. > > Basically, using QEMU_CLOCK_REALTIME, we calculate > the real time duration of the execution (meaning > generating TBs and executing them) and using the > the fields icount_decr.u16.low and icount_extra > (from the CPUState structure) shifted by icount_time_shift > we calculate the theoretical virtual time elapsed. > Having these 2 values, we can determine if the > guest is in advance and sleep. > > Signed-off-by: Sebastian Tanase <sebastian.tanase@openwide.fr> > Tested-by: Camille Bégué <camille.begue@openwide.fr> > --- > > At first, we tried to approximate at each for loop the real time elapsed > while searching for a TB (generating or retrieving from cache) and > executing it. We would then approximate the virtual time corresponding > to the number of virtual instructions executed. The difference between > these 2 values would allow us to know if the guest is in advance or delayed. > However, the function used for measuring the real time > (qemu_clock_get_ns(QEMU_CLOCK_REALTIME)) proved to be very expensive. > We had an added overhead of 13% of the total run time. > > Therefore, we modified the algorithm and only take into account the > difference between the 2 clocks at the begining of the cpu_exec function. > During the for loop we try to reduce the advance of the guest only by > computing the virtual time elapsed and sleeping if necessary. The overhead > is thus reduced to 3%. Even though this method still has a noticeable > overhead, it no longer is a bottleneck in trying to achieve a better > guest frequency for which the guest clock is faster than the host one. > > As for the the alignement of the 2 clocks, with the first algorithm > the guest clock was oscillating between -1 and 1ms compared to the host clock. > Using the second algorithm we notice that the guest is 5ms behind the host, which > is still acceptable for our use case. > > The tests where conducted using fio and stress. The host machine in an i5 CPU at > 3.10GHz running Debian Jessie (kernel 3.12). The guest machine is an arm versatile-pb built > with buildroot. > > Currently, on our test machine, the lowest icount we can achieve that is suitable for > aligning the 2 clocks is 6. However, we observe that the IO tests (using fio) are > slower than the cpu tests (using stress). You can place this text in the commit message. > --- > cpu-exec.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 77 insertions(+), 1 deletion(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index 38e5f02..ccc2c0e 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -22,6 +22,7 @@ > #include "tcg.h" > #include "qemu/atomic.h" > #include "sysemu/qtest.h" > +#include "qemu/timer.h" > > void cpu_loop_exit(CPUState *cpu) > { > @@ -209,6 +210,55 @@ static void cpu_handle_debug_exception(CPUArchState *env) > } > } > > +#if !(defined(CONFIG_USER_ONLY)) No need for ( ) here around defined. > +/* Allow the guest to have a max 3ms advance. > + * The difference between the 2 clocks could therefore > + * oscillate around 0. > + */ > +#define VM_CLOCK_ADVANCE 3000000 > + > +static int64_t delay_host(int64_t diff_clk) > +{ > + struct timespec sleep_delay, rem_delay; > + if (diff_clk > VM_CLOCK_ADVANCE) { > + sleep_delay.tv_sec = diff_clk / 1000000000LL; > + sleep_delay.tv_nsec = diff_clk % 1000000000LL; > + if (nanosleep(&sleep_delay, &rem_delay) < 0) { > + diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL; > + diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec; > + } else { > + diff_clk = 0; > + } > + } > + return diff_clk; > +} > + > +static int64_t update_clock_difference(int64_t diff_clk, > + int64_t *instr_counter, > + CPUState *cpu) > +{ > + int64_t instr_exec_time; > + instr_exec_time = *instr_counter - > + (cpu->icount_extra + > + cpu->icount_decr.u16.low); > + instr_exec_time = instr_exec_time << icount_time_shift; > + *instr_counter = cpu->icount_extra + cpu->icount_decr.u16.low; > + > + return diff_clk + instr_exec_time; > +} > + > +static int64_t align_clocks(int64_t diff_clk, int64_t *oic, CPUState *cpu) > +{ > + if (icount_align_option) { > + diff_clk = update_clock_difference(diff_clk, oic, cpu); > + diff_clk = delay_host(diff_clk); > + } > + > + return diff_clk; > +} > +#endif /* CONFIG USER ONLY */ > + > /* main execution loop */ > > volatile sig_atomic_t exit_request; > @@ -227,6 +277,10 @@ int cpu_exec(CPUArchState *env) > TranslationBlock *tb; > uint8_t *tc_ptr; > uintptr_t next_tb; > +#if !(defined(CONFIG_USER_ONLY)) > + /* Delay algorithm */ > + int64_t diff_clk, original_instr_counter; > +#endif Perhaps these could become a struct too (an abstract data type with init and realign operations), with a dummy implementation for CONFIG_USER_ONLY. The compiler will remove the dead code. > /* This must be volatile so it is not trashed by longjmp() */ > volatile bool have_tb_lock = false; > > @@ -282,7 +336,17 @@ int cpu_exec(CPUArchState *env) > #error unsupported target CPU > #endif > cpu->exception_index = -1; > - > +#if !(defined(CONFIG_USER_ONLY)) > + if (icount_align_option) { > + /* Calculate difference between guest clock and host clock. > + This delay includes the delay of the last cycle, so > + what we have to do is sleep until it is 0. As for the > + advance/delay we gain here, we try to fix it next time. */ > + diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - > + qemu_clock_get_ns(QEMU_CLOCK_REALTIME); This doesn't work yet, does it? Let's look for a less hacky solution to the problem you fix in patch 4, and squash it in this patch. Paolo > + original_instr_counter = cpu->icount_extra + cpu->icount_decr.u16.low; > + } > +#endif > /* prepare setjmp context for exception handling */ > for(;;) { > if (sigsetjmp(cpu->jmp_env, 0) == 0) { > @@ -672,6 +736,13 @@ int cpu_exec(CPUArchState *env) > if (insns_left > 0) { > /* Execute remaining instructions. */ > cpu_exec_nocache(env, insns_left, tb); > +#if !(defined(CONFIG_USER_ONLY)) > + /* We can ignore the return value > + because we exit anyway. */ > + align_clocks(diff_clk, > + &original_instr_counter, > + cpu); > +#endif > } > cpu->exception_index = EXCP_INTERRUPT; > next_tb = 0; > @@ -684,6 +755,11 @@ int cpu_exec(CPUArchState *env) > } > } > cpu->current_tb = NULL; > +#if !(defined(CONFIG_USER_ONLY)) > + diff_clk = align_clocks(diff_clk, > + &original_instr_counter, > + cpu); > +#endif > /* reset soft MMU for next block (it can currently > only be set by a memory fault) */ > } /* for(;;) */ >
diff --git a/cpu-exec.c b/cpu-exec.c index 38e5f02..ccc2c0e 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -22,6 +22,7 @@ #include "tcg.h" #include "qemu/atomic.h" #include "sysemu/qtest.h" +#include "qemu/timer.h" void cpu_loop_exit(CPUState *cpu) { @@ -209,6 +210,55 @@ static void cpu_handle_debug_exception(CPUArchState *env) } } +#if !(defined(CONFIG_USER_ONLY)) +/* Allow the guest to have a max 3ms advance. + * The difference between the 2 clocks could therefore + * oscillate around 0. + */ +#define VM_CLOCK_ADVANCE 3000000 + +static int64_t delay_host(int64_t diff_clk) +{ + struct timespec sleep_delay, rem_delay; + if (diff_clk > VM_CLOCK_ADVANCE) { + sleep_delay.tv_sec = diff_clk / 1000000000LL; + sleep_delay.tv_nsec = diff_clk % 1000000000LL; + if (nanosleep(&sleep_delay, &rem_delay) < 0) { + diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL; + diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec; + } else { + diff_clk = 0; + } + } + return diff_clk; +} + +static int64_t update_clock_difference(int64_t diff_clk, + int64_t *instr_counter, + CPUState *cpu) +{ + int64_t instr_exec_time; + instr_exec_time = *instr_counter - + (cpu->icount_extra + + cpu->icount_decr.u16.low); + instr_exec_time = instr_exec_time << icount_time_shift; + *instr_counter = cpu->icount_extra + cpu->icount_decr.u16.low; + + return diff_clk + instr_exec_time; +} + +static int64_t align_clocks(int64_t diff_clk, int64_t *oic, CPUState *cpu) +{ + if (icount_align_option) { + diff_clk = update_clock_difference(diff_clk, oic, cpu); + diff_clk = delay_host(diff_clk); + } + + return diff_clk; +} + +#endif /* CONFIG USER ONLY */ + /* main execution loop */ volatile sig_atomic_t exit_request; @@ -227,6 +277,10 @@ int cpu_exec(CPUArchState *env) TranslationBlock *tb; uint8_t *tc_ptr; uintptr_t next_tb; +#if !(defined(CONFIG_USER_ONLY)) + /* Delay algorithm */ + int64_t diff_clk, original_instr_counter; +#endif /* This must be volatile so it is not trashed by longjmp() */ volatile bool have_tb_lock = false; @@ -282,7 +336,17 @@ int cpu_exec(CPUArchState *env) #error unsupported target CPU #endif cpu->exception_index = -1; - +#if !(defined(CONFIG_USER_ONLY)) + if (icount_align_option) { + /* Calculate difference between guest clock and host clock. + This delay includes the delay of the last cycle, so + what we have to do is sleep until it is 0. As for the + advance/delay we gain here, we try to fix it next time. */ + diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - + qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + original_instr_counter = cpu->icount_extra + cpu->icount_decr.u16.low; + } +#endif /* prepare setjmp context for exception handling */ for(;;) { if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -672,6 +736,13 @@ int cpu_exec(CPUArchState *env) if (insns_left > 0) { /* Execute remaining instructions. */ cpu_exec_nocache(env, insns_left, tb); +#if !(defined(CONFIG_USER_ONLY)) + /* We can ignore the return value + because we exit anyway. */ + align_clocks(diff_clk, + &original_instr_counter, + cpu); +#endif } cpu->exception_index = EXCP_INTERRUPT; next_tb = 0; @@ -684,6 +755,11 @@ int cpu_exec(CPUArchState *env) } } cpu->current_tb = NULL; +#if !(defined(CONFIG_USER_ONLY)) + diff_clk = align_clocks(diff_clk, + &original_instr_counter, + cpu); +#endif /* reset soft MMU for next block (it can currently only be set by a memory fault) */ } /* for(;;) */