diff mbox

[v4,11/43] tcg: define CF_PARALLEL and use it for TB hashing along with CF_COUNT_MASK

Message ID 1500616763-26560-2-git-send-email-cota@braap.org
State New
Headers show

Commit Message

Emilio Cota July 21, 2017, 5:59 a.m. UTC
This will enable us to decouple code translation from the value
of parallel_cpus at any given time. It will also help us minimize
TB flushes when generating code via EXCP_ATOMIC.

Note that the declaration of parallel_cpus is brought to exec-all.h
to be able to define there the "curr_cflags" inline.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/exec/exec-all.h   | 20 +++++++++++++++++++-
 include/exec/tb-hash-xx.h |  9 ++++++---
 include/exec/tb-hash.h    |  4 ++--
 include/exec/tb-lookup.h  |  6 +++---
 tcg/tcg.h                 |  1 -
 accel/tcg/cpu-exec.c      | 45 +++++++++++++++++++++++----------------------
 accel/tcg/translate-all.c | 13 +++++++++----
 exec.c                    |  2 +-
 tcg/tcg-runtime.c         |  2 +-
 tests/qht-bench.c         |  2 +-
 10 files changed, 65 insertions(+), 39 deletions(-)

Comments

Richard Henderson July 21, 2017, 9:28 p.m. UTC | #1
On 07/20/2017 07:59 PM, Emilio G. Cota wrote:
> This will enable us to decouple code translation from the value
> of parallel_cpus at any given time. It will also help us minimize
> TB flushes when generating code via EXCP_ATOMIC.
> 
> Note that the declaration of parallel_cpus is brought to exec-all.h
> to be able to define there the "curr_cflags" inline.
> 
> Signed-off-by: Emilio G. Cota<cota@braap.org>
> ---
>   include/exec/exec-all.h   | 20 +++++++++++++++++++-
>   include/exec/tb-hash-xx.h |  9 ++++++---
>   include/exec/tb-hash.h    |  4 ++--
>   include/exec/tb-lookup.h  |  6 +++---
>   tcg/tcg.h                 |  1 -
>   accel/tcg/cpu-exec.c      | 45 +++++++++++++++++++++++----------------------
>   accel/tcg/translate-all.c | 13 +++++++++----
>   exec.c                    |  2 +-
>   tcg/tcg-runtime.c         |  2 +-
>   tests/qht-bench.c         |  2 +-
>   10 files changed, 65 insertions(+), 39 deletions(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
Pranith Kumar Aug. 27, 2017, 10:15 p.m. UTC | #2
Hi Emilio,

On Fri, Jul 21, 2017 at 1:59 AM, Emilio G. Cota <cota@braap.org> wrote:
> This will enable us to decouple code translation from the value
> of parallel_cpus at any given time. It will also help us minimize
> TB flushes when generating code via EXCP_ATOMIC.
>
> Note that the declaration of parallel_cpus is brought to exec-all.h
> to be able to define there the "curr_cflags" inline.
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>

I was testing a winxp image today and I bisected a infinite loop to
this commit. The loop happens both with and without mttcg, so I think
it has got to do with something else.

It seems to be executing this instruction:

Trace 0x7fffc1d036c0 [0: 00000000000c9a6b]
----------------
IN:
0x00000000000c9a6b:  rep movsb %cs:(%si),%es:(%di)

and never stops.

You can find an execution log here: http://pranith.org/files/log_n.txt.gz

Let me know if you need more information.

Thanks,

> ---
>  include/exec/exec-all.h   | 20 +++++++++++++++++++-
>  include/exec/tb-hash-xx.h |  9 ++++++---
>  include/exec/tb-hash.h    |  4 ++--
>  include/exec/tb-lookup.h  |  6 +++---
>  tcg/tcg.h                 |  1 -
>  accel/tcg/cpu-exec.c      | 45 +++++++++++++++++++++++----------------------
>  accel/tcg/translate-all.c | 13 +++++++++----
>  exec.c                    |  2 +-
>  tcg/tcg-runtime.c         |  2 +-
>  tests/qht-bench.c         |  2 +-
>  10 files changed, 65 insertions(+), 39 deletions(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 8cbd90b..f6a928d 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -353,6 +353,9 @@ struct TranslationBlock {
>  #define CF_USE_ICOUNT  0x20000
>  #define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */
>  #define CF_INVALID     0x80000 /* TB is stale. Setters must acquire tb_lock */
> +#define CF_PARALLEL    0x100000 /* Generate code for a parallel context */
> +/* cflags' mask for hashing/comparison */
> +#define CF_HASH_MASK (CF_COUNT_MASK | CF_PARALLEL)
>
>      /* Per-vCPU dynamic tracing state used to generate this TB */
>      uint32_t trace_vcpu_dstate;
> @@ -396,11 +399,26 @@ struct TranslationBlock {
>      uintptr_t jmp_list_first;
>  };
>
> +extern bool parallel_cpus;
> +
> +/* Hide the atomic_read to make code a little easier on the eyes */
> +static inline uint32_t tb_cflags(const TranslationBlock *tb)
> +{
> +    return atomic_read(&tb->cflags);
> +}
> +
> +/* current cflags for hashing/comparison */
> +static inline uint32_t curr_cflags(void)
> +{
> +    return parallel_cpus ? CF_PARALLEL : 0;
> +}
> +
>  void tb_free(TranslationBlock *tb);
>  void tb_flush(CPUState *cpu);
>  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
>  TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
> -                                   target_ulong cs_base, uint32_t flags);
> +                                   target_ulong cs_base, uint32_t flags,
> +                                   uint32_t cf_mask);
>
>  #if defined(USE_DIRECT_JUMP)
>
> diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h
> index 6cd3022..747a9a6 100644
> --- a/include/exec/tb-hash-xx.h
> +++ b/include/exec/tb-hash-xx.h
> @@ -48,8 +48,8 @@
>   * xxhash32, customized for input variables that are not guaranteed to be
>   * contiguous in memory.
>   */
> -static inline
> -uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
> +static inline uint32_t
> +tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
>  {
>      uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
>      uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
> @@ -78,7 +78,7 @@ uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
>      v4 *= PRIME32_1;
>
>      h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
> -    h32 += 24;
> +    h32 += 28;
>
>      h32 += e * PRIME32_3;
>      h32  = rol32(h32, 17) * PRIME32_4;
> @@ -86,6 +86,9 @@ uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
>      h32 += f * PRIME32_3;
>      h32  = rol32(h32, 17) * PRIME32_4;
>
> +    h32 += g * PRIME32_3;
> +    h32  = rol32(h32, 17) * PRIME32_4;
> +
>      h32 ^= h32 >> 15;
>      h32 *= PRIME32_2;
>      h32 ^= h32 >> 13;
> diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
> index 17b5ee0..0526c4f 100644
> --- a/include/exec/tb-hash.h
> +++ b/include/exec/tb-hash.h
> @@ -59,9 +59,9 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
>
>  static inline
>  uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
> -                      uint32_t trace_vcpu_dstate)
> +                      uint32_t cf_mask, uint32_t trace_vcpu_dstate)
>  {
> -    return tb_hash_func6(phys_pc, pc, flags, trace_vcpu_dstate);
> +    return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
>  }
>
>  #endif
> diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h
> index 436b6d5..2961385 100644
> --- a/include/exec/tb-lookup.h
> +++ b/include/exec/tb-lookup.h
> @@ -21,7 +21,7 @@
>  /* Might cause an exception, so have a longjmp destination ready */
>  static inline TranslationBlock *
>  tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
> -                     uint32_t *flags)
> +                     uint32_t *flags, uint32_t cf_mask)
>  {
>      CPUArchState *env = (CPUArchState *)cpu->env_ptr;
>      TranslationBlock *tb;
> @@ -35,10 +35,10 @@ tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
>                 tb->cs_base == *cs_base &&
>                 tb->flags == *flags &&
>                 tb->trace_vcpu_dstate == *cpu->trace_dstate &&
> -               !(atomic_read(&tb->cflags) & CF_INVALID))) {
> +               (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
>          return tb;
>      }
> -    tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags);
> +    tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
>      if (tb == NULL) {
>          return NULL;
>      }
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index da78721..96872f8 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -730,7 +730,6 @@ struct TCGContext {
>  };
>
>  extern TCGContext tcg_ctx;
> -extern bool parallel_cpus;
>
>  static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v)
>  {
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index fae8c40..b71e015 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -207,7 +207,8 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>      tb_lock();
>      tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
>                       max_cycles | CF_NOCACHE
> -                         | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
> +                         | (ignore_icount ? CF_IGNORE_ICOUNT : 0)
> +                         | curr_cflags());
>      tb->orig_tb = orig_tb;
>      tb_unlock();
>
> @@ -225,31 +226,27 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>  static void cpu_exec_step(CPUState *cpu)
>  {
>      CPUClass *cc = CPU_GET_CLASS(cpu);
> -    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
>      TranslationBlock *tb;
>      target_ulong cs_base, pc;
>      uint32_t flags;
> +    uint32_t cflags = 1 | CF_IGNORE_ICOUNT;
>
> -    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>      if (sigsetjmp(cpu->jmp_env, 0) == 0) {
> -        mmap_lock();
> -        tb_lock();
> -        tb = tb_gen_code(cpu, pc, cs_base, flags,
> -                         1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> -        tb->orig_tb = NULL;
> -        tb_unlock();
> -        mmap_unlock();
> +        tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags,
> +                                  cflags & CF_HASH_MASK);
> +        if (tb == NULL) {
> +            mmap_lock();
> +            tb_lock();
> +            tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
> +            tb_unlock();
> +            mmap_unlock();
> +        }
>
>          cc->cpu_exec_enter(cpu);
>          /* execute the generated code */
> -        trace_exec_tb_nocache(tb, pc);
> +        trace_exec_tb(tb, pc);
>          cpu_tb_exec(cpu, tb);
>          cc->cpu_exec_exit(cpu);
> -
> -        tb_lock();
> -        tb_phys_invalidate(tb, -1);
> -        tb_free(tb);
> -        tb_unlock();
>      } else {
>          /* We may have exited due to another problem here, so we need
>           * to reset any tb_locks we may have taken but didn't release.
> @@ -281,6 +278,7 @@ struct tb_desc {
>      CPUArchState *env;
>      tb_page_addr_t phys_page1;
>      uint32_t flags;
> +    uint32_t cf_mask;
>      uint32_t trace_vcpu_dstate;
>  };
>
> @@ -294,7 +292,7 @@ static bool tb_cmp(const void *p, const void *d)
>          tb->cs_base == desc->cs_base &&
>          tb->flags == desc->flags &&
>          tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
> -        !(atomic_read(&tb->cflags) & CF_INVALID)) {
> +        (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
>          /* check next page if needed */
>          if (tb->page_addr[1] == -1) {
>              return true;
> @@ -313,7 +311,8 @@ static bool tb_cmp(const void *p, const void *d)
>  }
>
>  TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
> -                                   target_ulong cs_base, uint32_t flags)
> +                                   target_ulong cs_base, uint32_t flags,
> +                                   uint32_t cf_mask)
>  {
>      tb_page_addr_t phys_pc;
>      struct tb_desc desc;
> @@ -322,11 +321,12 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
>      desc.env = (CPUArchState *)cpu->env_ptr;
>      desc.cs_base = cs_base;
>      desc.flags = flags;
> +    desc.cf_mask = cf_mask;
>      desc.trace_vcpu_dstate = *cpu->trace_dstate;
>      desc.pc = pc;
>      phys_pc = get_page_addr_code(desc.env, pc);
>      desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
> -    h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
> +    h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
>      return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
>  }
>
> @@ -338,8 +338,9 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
>      target_ulong cs_base, pc;
>      uint32_t flags;
>      bool acquired_tb_lock = false;
> +    uint32_t cf_mask = curr_cflags();
>
> -    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags);
> +    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
>      if (tb == NULL) {
>          /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
>           * taken outside tb_lock. As system emulation is currently
> @@ -352,10 +353,10 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
>          /* There's a chance that our desired tb has been translated while
>           * taking the locks so we check again inside the lock.
>           */
> -        tb = tb_htable_lookup(cpu, pc, cs_base, flags);
> +        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
>          if (likely(tb == NULL)) {
>              /* if no translated code available, then translate it now */
> -            tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
> +            tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
>          }
>
>          mmap_unlock();
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index c8abef0..c1ce38f 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1077,7 +1077,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>
>      /* remove the TB from the hash list */
>      phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
> -    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
> +    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
> +                     tb->trace_vcpu_dstate);
>      qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);
>
>      /* remove the TB from the page list */
> @@ -1222,7 +1223,8 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>      }
>
>      /* add in the hash table */
> -    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
> +    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
> +                     tb->trace_vcpu_dstate);
>      qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
>
>  #ifdef DEBUG_TB_CHECK
> @@ -1503,7 +1505,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>          /* we generate a block containing just the instruction
>             modifying the memory. It will ensure that it cannot modify
>             itself */
> -        tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
> +        tb_gen_code(cpu, current_pc, current_cs_base, current_flags,
> +                    1 | curr_cflags());
>          cpu_loop_exit_noexc(cpu);
>      }
>  #endif
> @@ -1621,7 +1624,8 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
>          /* we generate a block containing just the instruction
>             modifying the memory. It will ensure that it cannot modify
>             itself */
> -        tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
> +        tb_gen_code(cpu, current_pc, current_cs_base, current_flags,
> +                    1 | curr_cflags());
>          /* tb_lock will be reset after cpu_loop_exit_noexc longjmps
>           * back into the cpu_exec loop. */
>          return true;
> @@ -1765,6 +1769,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>      }
>
>      cflags = n | CF_LAST_IO;
> +    cflags |= curr_cflags();
>      pc = tb->pc;
>      cs_base = tb->cs_base;
>      flags = tb->flags;
> diff --git a/exec.c b/exec.c
> index 01ac21e..94b0f3e 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2433,7 +2433,7 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
>                      cpu_loop_exit(cpu);
>                  } else {
>                      cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
> -                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
> +                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1 | curr_cflags());
>                      cpu_loop_exit_noexc(cpu);
>                  }
>              }
> diff --git a/tcg/tcg-runtime.c b/tcg/tcg-runtime.c
> index 7100339..4f873a9 100644
> --- a/tcg/tcg-runtime.c
> +++ b/tcg/tcg-runtime.c
> @@ -151,7 +151,7 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env)
>      target_ulong cs_base, pc;
>      uint32_t flags;
>
> -    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags);
> +    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
>      if (tb == NULL) {
>          return tcg_ctx.code_gen_epilogue;
>      }
> diff --git a/tests/qht-bench.c b/tests/qht-bench.c
> index 11c1cec..4cabdfd 100644
> --- a/tests/qht-bench.c
> +++ b/tests/qht-bench.c
> @@ -103,7 +103,7 @@ static bool is_equal(const void *obj, const void *userp)
>
>  static inline uint32_t h(unsigned long v)
>  {
> -    return tb_hash_func6(v, 0, 0, 0);
> +    return tb_hash_func7(v, 0, 0, 0, 0);
>  }
>
>  /*
> --
> 2.7.4
>
>
Emilio Cota Aug. 29, 2017, 9:16 p.m. UTC | #3
On Sun, Aug 27, 2017 at 18:15:50 -0400, Pranith Kumar wrote:
> Hi Emilio,
> 
> On Fri, Jul 21, 2017 at 1:59 AM, Emilio G. Cota <cota@braap.org> wrote:
> > This will enable us to decouple code translation from the value
> > of parallel_cpus at any given time. It will also help us minimize
> > TB flushes when generating code via EXCP_ATOMIC.
> >
> > Note that the declaration of parallel_cpus is brought to exec-all.h
> > to be able to define there the "curr_cflags" inline.
> >
> > Signed-off-by: Emilio G. Cota <cota@braap.org>
> 
> I was testing a winxp image today and I bisected a infinite loop to
> this commit. The loop happens both with and without mttcg, so I think
> it has got to do with something else.

Can you test the below? It lets me boot ubuntu, otherwise it reliably
chokes on a 'rep movsb' *very* early (doesn't even get to grub).

This discusson on v2 might be relevant (I added CF_COUNT_MASK as a
result of it, but it seems I have to revisit that):
  https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg06456.html

Anyway let me know if this fixes it for you. Thanks for testing!

		Emilio

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 025fae0..8b2f233 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -326,7 +326,7 @@ struct TranslationBlock {
 #define CF_INVALID     0x80000 /* TB is stale. Setters must acquire tb_lock */
 #define CF_PARALLEL    0x100000 /* Generate code for a parallel context */
 /* cflags' mask for hashing/comparison */
-#define CF_HASH_MASK (CF_COUNT_MASK | CF_PARALLEL)
+#define CF_HASH_MASK (CF_PARALLEL)

     /* Per-vCPU dynamic tracing state used to generate this TB */
     uint32_t trace_vcpu_dstate;
Pranith Kumar Aug. 30, 2017, 2:43 p.m. UTC | #4
On Tue, Aug 29, 2017 at 5:16 PM, Emilio G. Cota <cota@braap.org> wrote:
> On Sun, Aug 27, 2017 at 18:15:50 -0400, Pranith Kumar wrote:
>> Hi Emilio,
>>
>> On Fri, Jul 21, 2017 at 1:59 AM, Emilio G. Cota <cota@braap.org> wrote:
>> > This will enable us to decouple code translation from the value
>> > of parallel_cpus at any given time. It will also help us minimize
>> > TB flushes when generating code via EXCP_ATOMIC.
>> >
>> > Note that the declaration of parallel_cpus is brought to exec-all.h
>> > to be able to define there the "curr_cflags" inline.
>> >
>> > Signed-off-by: Emilio G. Cota <cota@braap.org>
>>
>> I was testing a winxp image today and I bisected a infinite loop to
>> this commit. The loop happens both with and without mttcg, so I think
>> it has got to do with something else.
>
> Can you test the below? It lets me boot ubuntu, otherwise it reliably
> chokes on a 'rep movsb' *very* early (doesn't even get to grub).
>
> This discusson on v2 might be relevant (I added CF_COUNT_MASK as a
> result of it, but it seems I have to revisit that):
>   https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg06456.html
>
> Anyway let me know if this fixes it for you. Thanks for testing!
>

Yes, this fixes the issue for me.

Thanks,
--
Pranith
Emilio Cota Sept. 22, 2017, 8:40 p.m. UTC | #5
Hi Richard,

Are you planning to get this patchset merged in this window? If so, I can
give it a respin on top of the current master.

Anyway, before doing so we should fix the issue around CF_COUNT_MASK that
Pranith reported:

On Wed, Aug 30, 2017 at 10:43:28 -0400, Pranith Kumar wrote:
> On Tue, Aug 29, 2017 at 5:16 PM, Emilio G. Cota <cota@braap.org> wrote:
> > On Sun, Aug 27, 2017 at 18:15:50 -0400, Pranith Kumar wrote:
> >> Hi Emilio,
> >>
> >> On Fri, Jul 21, 2017 at 1:59 AM, Emilio G. Cota <cota@braap.org> wrote:
> >> > This will enable us to decouple code translation from the value
> >> > of parallel_cpus at any given time. It will also help us minimize
> >> > TB flushes when generating code via EXCP_ATOMIC.
> >> >
> >> > Note that the declaration of parallel_cpus is brought to exec-all.h
> >> > to be able to define there the "curr_cflags" inline.
> >> >
> >> > Signed-off-by: Emilio G. Cota <cota@braap.org>
> >>
> >> I was testing a winxp image today and I bisected a infinite loop to
> >> this commit. The loop happens both with and without mttcg, so I think
> >> it has got to do with something else.
> >
> > Can you test the below? It lets me boot ubuntu, otherwise it reliably
> > chokes on a 'rep movsb' *very* early (doesn't even get to grub).
> >
> > This discusson on v2 might be relevant (I added CF_COUNT_MASK as a
> > result of it, but it seems I have to revisit that):
> >   https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg06456.html
> >
> > Anyway let me know if this fixes it for you. Thanks for testing!
> >
> 
> Yes, this fixes the issue for me.

My quick-fix is just not to check those bits, but as you pointed out during
v3's review the whole thing is a little bit fragile if we don't check them.

Should we just go with the CF_NOCHAIN flag that you proposed? If so, do you
want me to give that approach a go, or you prefer to do it yourself?

Thanks,

		Emilio
Richard Henderson Sept. 25, 2017, 5:01 p.m. UTC | #6
On 09/22/2017 01:40 PM, Emilio G. Cota wrote:
> Hi Richard,
> 
> Are you planning to get this patchset merged in this window? If so, I can
> give it a respin on top of the current master.

Yes, I do.  I've been intending to look at ...

> Anyway, before doing so we should fix the issue around CF_COUNT_MASK that
> Pranith reported:

... this one and figure out why my intuition is wrong.
I'm at Linaro Connect this week, so it may have to wait til next.


r~
Emilio Cota Oct. 5, 2017, 11:24 p.m. UTC | #7
On Mon, Sep 25, 2017 at 10:01:15 -0700, Richard Henderson wrote:
> On 09/22/2017 01:40 PM, Emilio G. Cota wrote:
> > Hi Richard,
> > 
> > Are you planning to get this patchset merged in this window? If so, I can
> > give it a respin on top of the current master.
> 
> Yes, I do.  I've been intending to look at ...
> 
> > Anyway, before doing so we should fix the issue around CF_COUNT_MASK that
> > Pranith reported:
> 
> ... this one and figure out why my intuition is wrong.
> I'm at Linaro Connect this week, so it may have to wait til next.

I just tested this with icount. Turns out two fixups to this patchset are
necessary to not break icount. The first one is (again) to just
include CF_PARALLEL in the hash mask. The second is a fix for the comparison
function of tb_tc, without which removals don't work.

I'm including the fixups below.

Thanks,

		Emilio

commit 7a899a8df2d769dd80ba1a7f559cb4ddbb0c568b
Author: Emilio G. Cota <cota@braap.org>
Date:   Thu Oct 5 18:40:30 2017 -0400

    FIXUP for "tcg: define CF_PARALLEL ..."
    
    Signed-off-by: Emilio G. Cota <cota@braap.org>

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 025fae0..8b2f233 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -326,7 +326,7 @@ struct TranslationBlock {
 #define CF_INVALID     0x80000 /* TB is stale. Setters must acquire tb_lock */
 #define CF_PARALLEL    0x100000 /* Generate code for a parallel context */
 /* cflags' mask for hashing/comparison */
-#define CF_HASH_MASK (CF_COUNT_MASK | CF_PARALLEL)
+#define CF_HASH_MASK (CF_PARALLEL)
 
     /* Per-vCPU dynamic tracing state used to generate this TB */
     uint32_t trace_vcpu_dstate;

commit c102c2409a5a134fd7f9ba61f69a5ae29df99c89
Author: Emilio G. Cota <cota@braap.org>
Date:   Thu Oct 5 18:51:24 2017 -0400

    FIXUP for "translate-all: use a binary search tree to ..."
    
    Signed-off-by: Emilio G. Cota <cota@braap.org>

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 9f1faff..fe607ca 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -793,19 +793,19 @@ static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
     const struct tb_tc *b = bp;
 
     /*
-     * When both sizes are set, we know this isn't a lookup and therefore
-     * the two buffers are non-overlapping.
+     * When both sizes are set, we know this isn't a lookup.
      * This is the most likely case: every TB must be inserted; lookups
      * are a lot less frequent.
      */
     if (likely(a->size && b->size)) {
-        /* a->ptr == b->ptr would mean the buffers overlap */
-        g_assert(a->ptr != b->ptr);
-
         if (a->ptr > b->ptr) {
             return 1;
+        } else if (a->ptr < b->ptr) {
+            return -1;
         }
-        return -1;
+        /* a->ptr == b->ptr should happen only on deletions */
+        g_assert(a->size == b->size);
+        return 0;
     }
     /*
      * All lookups have either .size field set to 0.
Emilio Cota Oct. 9, 2017, 7:24 p.m. UTC | #8
On Thu, Oct 05, 2017 at 19:24:16 -0400, Emilio G. Cota wrote:
> I'm including the fixups below.

Just pushed v5 of this series. I rebased the series on top of the current master,
and added the already mentioned fixes plus a new one (see below).

Grab the patches from:
  https://github.com/cota/qemu/tree/multi-tcg-v5

Changes since v4:
- Rebase to current master. (fixed a few conflicts, mostly due to some targets
  having been converted to the generic translation loop.)
- Add the removal of CF_COUNT_MASK as a separate "FIXUP" commit to signal
  that this still requires attention.
- Fix tb TC comparison function to support deletions (otherwise -icount breaks)
- [new fix] TCG regions: Rely on max_cpus instead of smp_cpus. Otherwise we'd
  break CPU hotplug on targets that support it. (max_cpus and smp_cpus are
  set from the command line with -smp foo,maxcpus=bar; max_cpus is always
  >= smp_cpus.)

Thanks,

		Emilio
Richard Henderson Oct. 10, 2017, 1:23 a.m. UTC | #9
On 10/09/2017 12:24 PM, Emilio G. Cota wrote:
> On Thu, Oct 05, 2017 at 19:24:16 -0400, Emilio G. Cota wrote:
>> I'm including the fixups below.
> 
> Just pushed v5 of this series. I rebased the series on top of the current master,
> and added the already mentioned fixes plus a new one (see below).
> 
> Grab the patches from:
>   https://github.com/cota/qemu/tree/multi-tcg-v5

Thanks.  I was just about to do this rebase myself.

> Changes since v4:
> - Rebase to current master. (fixed a few conflicts, mostly due to some targets
>   having been converted to the generic translation loop.)
> - Add the removal of CF_COUNT_MASK as a separate "FIXUP" commit to signal
>   that this still requires attention.
> - Fix tb TC comparison function to support deletions (otherwise -icount breaks)
> - [new fix] TCG regions: Rely on max_cpus instead of smp_cpus. Otherwise we'd
>   break CPU hotplug on targets that support it. (max_cpus and smp_cpus are
>   set from the command line with -smp foo,maxcpus=bar; max_cpus is always
>   >= smp_cpus.)

Excellent.  I'll work on reviewing these changes this week.
In the meantime, I've cherry-picked about half of the patch set.


r~
diff mbox

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 8cbd90b..f6a928d 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -353,6 +353,9 @@  struct TranslationBlock {
 #define CF_USE_ICOUNT  0x20000
 #define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */
 #define CF_INVALID     0x80000 /* TB is stale. Setters must acquire tb_lock */
+#define CF_PARALLEL    0x100000 /* Generate code for a parallel context */
+/* cflags' mask for hashing/comparison */
+#define CF_HASH_MASK (CF_COUNT_MASK | CF_PARALLEL)
 
     /* Per-vCPU dynamic tracing state used to generate this TB */
     uint32_t trace_vcpu_dstate;
@@ -396,11 +399,26 @@  struct TranslationBlock {
     uintptr_t jmp_list_first;
 };
 
+extern bool parallel_cpus;
+
+/* Hide the atomic_read to make code a little easier on the eyes */
+static inline uint32_t tb_cflags(const TranslationBlock *tb)
+{
+    return atomic_read(&tb->cflags);
+}
+
+/* current cflags for hashing/comparison */
+static inline uint32_t curr_cflags(void)
+{
+    return parallel_cpus ? CF_PARALLEL : 0;
+}
+
 void tb_free(TranslationBlock *tb);
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, uint32_t flags);
+                                   target_ulong cs_base, uint32_t flags,
+                                   uint32_t cf_mask);
 
 #if defined(USE_DIRECT_JUMP)
 
diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h
index 6cd3022..747a9a6 100644
--- a/include/exec/tb-hash-xx.h
+++ b/include/exec/tb-hash-xx.h
@@ -48,8 +48,8 @@ 
  * xxhash32, customized for input variables that are not guaranteed to be
  * contiguous in memory.
  */
-static inline
-uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
+static inline uint32_t
+tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
 {
     uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
     uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
@@ -78,7 +78,7 @@  uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
     v4 *= PRIME32_1;
 
     h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
-    h32 += 24;
+    h32 += 28;
 
     h32 += e * PRIME32_3;
     h32  = rol32(h32, 17) * PRIME32_4;
@@ -86,6 +86,9 @@  uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
     h32 += f * PRIME32_3;
     h32  = rol32(h32, 17) * PRIME32_4;
 
+    h32 += g * PRIME32_3;
+    h32  = rol32(h32, 17) * PRIME32_4;
+
     h32 ^= h32 >> 15;
     h32 *= PRIME32_2;
     h32 ^= h32 >> 13;
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 17b5ee0..0526c4f 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -59,9 +59,9 @@  static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
 
 static inline
 uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
-                      uint32_t trace_vcpu_dstate)
+                      uint32_t cf_mask, uint32_t trace_vcpu_dstate)
 {
-    return tb_hash_func6(phys_pc, pc, flags, trace_vcpu_dstate);
+    return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
 #endif
diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h
index 436b6d5..2961385 100644
--- a/include/exec/tb-lookup.h
+++ b/include/exec/tb-lookup.h
@@ -21,7 +21,7 @@ 
 /* Might cause an exception, so have a longjmp destination ready */
 static inline TranslationBlock *
 tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
-                     uint32_t *flags)
+                     uint32_t *flags, uint32_t cf_mask)
 {
     CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
@@ -35,10 +35,10 @@  tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
                tb->cs_base == *cs_base &&
                tb->flags == *flags &&
                tb->trace_vcpu_dstate == *cpu->trace_dstate &&
-               !(atomic_read(&tb->cflags) & CF_INVALID))) {
+               (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
         return tb;
     }
-    tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags);
+    tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
     if (tb == NULL) {
         return NULL;
     }
diff --git a/tcg/tcg.h b/tcg/tcg.h
index da78721..96872f8 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -730,7 +730,6 @@  struct TCGContext {
 };
 
 extern TCGContext tcg_ctx;
-extern bool parallel_cpus;
 
 static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v)
 {
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index fae8c40..b71e015 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -207,7 +207,8 @@  static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
     tb_lock();
     tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                      max_cycles | CF_NOCACHE
-                         | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
+                         | (ignore_icount ? CF_IGNORE_ICOUNT : 0)
+                         | curr_cflags());
     tb->orig_tb = orig_tb;
     tb_unlock();
 
@@ -225,31 +226,27 @@  static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
 static void cpu_exec_step(CPUState *cpu)
 {
     CPUClass *cc = CPU_GET_CLASS(cpu);
-    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags;
+    uint32_t cflags = 1 | CF_IGNORE_ICOUNT;
 
-    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
-        mmap_lock();
-        tb_lock();
-        tb = tb_gen_code(cpu, pc, cs_base, flags,
-                         1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
-        tb->orig_tb = NULL;
-        tb_unlock();
-        mmap_unlock();
+        tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags,
+                                  cflags & CF_HASH_MASK);
+        if (tb == NULL) {
+            mmap_lock();
+            tb_lock();
+            tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
+            tb_unlock();
+            mmap_unlock();
+        }
 
         cc->cpu_exec_enter(cpu);
         /* execute the generated code */
-        trace_exec_tb_nocache(tb, pc);
+        trace_exec_tb(tb, pc);
         cpu_tb_exec(cpu, tb);
         cc->cpu_exec_exit(cpu);
-
-        tb_lock();
-        tb_phys_invalidate(tb, -1);
-        tb_free(tb);
-        tb_unlock();
     } else {
         /* We may have exited due to another problem here, so we need
          * to reset any tb_locks we may have taken but didn't release.
@@ -281,6 +278,7 @@  struct tb_desc {
     CPUArchState *env;
     tb_page_addr_t phys_page1;
     uint32_t flags;
+    uint32_t cf_mask;
     uint32_t trace_vcpu_dstate;
 };
 
@@ -294,7 +292,7 @@  static bool tb_cmp(const void *p, const void *d)
         tb->cs_base == desc->cs_base &&
         tb->flags == desc->flags &&
         tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
-        !(atomic_read(&tb->cflags) & CF_INVALID)) {
+        (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
         /* check next page if needed */
         if (tb->page_addr[1] == -1) {
             return true;
@@ -313,7 +311,8 @@  static bool tb_cmp(const void *p, const void *d)
 }
 
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, uint32_t flags)
+                                   target_ulong cs_base, uint32_t flags,
+                                   uint32_t cf_mask)
 {
     tb_page_addr_t phys_pc;
     struct tb_desc desc;
@@ -322,11 +321,12 @@  TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
     desc.env = (CPUArchState *)cpu->env_ptr;
     desc.cs_base = cs_base;
     desc.flags = flags;
+    desc.cf_mask = cf_mask;
     desc.trace_vcpu_dstate = *cpu->trace_dstate;
     desc.pc = pc;
     phys_pc = get_page_addr_code(desc.env, pc);
     desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
+    h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
     return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
 }
 
@@ -338,8 +338,9 @@  static inline TranslationBlock *tb_find(CPUState *cpu,
     target_ulong cs_base, pc;
     uint32_t flags;
     bool acquired_tb_lock = false;
+    uint32_t cf_mask = curr_cflags();
 
-    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags);
+    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
     if (tb == NULL) {
         /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
          * taken outside tb_lock. As system emulation is currently
@@ -352,10 +353,10 @@  static inline TranslationBlock *tb_find(CPUState *cpu,
         /* There's a chance that our desired tb has been translated while
          * taking the locks so we check again inside the lock.
          */
-        tb = tb_htable_lookup(cpu, pc, cs_base, flags);
+        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
         if (likely(tb == NULL)) {
             /* if no translated code available, then translate it now */
-            tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
+            tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
         }
 
         mmap_unlock();
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index c8abef0..c1ce38f 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1077,7 +1077,8 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
+                     tb->trace_vcpu_dstate);
     qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);
 
     /* remove the TB from the page list */
@@ -1222,7 +1223,8 @@  static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     }
 
     /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
+                     tb->trace_vcpu_dstate);
     qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
 
 #ifdef DEBUG_TB_CHECK
@@ -1503,7 +1505,8 @@  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
         /* we generate a block containing just the instruction
            modifying the memory. It will ensure that it cannot modify
            itself */
-        tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
+        tb_gen_code(cpu, current_pc, current_cs_base, current_flags,
+                    1 | curr_cflags());
         cpu_loop_exit_noexc(cpu);
     }
 #endif
@@ -1621,7 +1624,8 @@  static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
         /* we generate a block containing just the instruction
            modifying the memory. It will ensure that it cannot modify
            itself */
-        tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
+        tb_gen_code(cpu, current_pc, current_cs_base, current_flags,
+                    1 | curr_cflags());
         /* tb_lock will be reset after cpu_loop_exit_noexc longjmps
          * back into the cpu_exec loop. */
         return true;
@@ -1765,6 +1769,7 @@  void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     }
 
     cflags = n | CF_LAST_IO;
+    cflags |= curr_cflags();
     pc = tb->pc;
     cs_base = tb->cs_base;
     flags = tb->flags;
diff --git a/exec.c b/exec.c
index 01ac21e..94b0f3e 100644
--- a/exec.c
+++ b/exec.c
@@ -2433,7 +2433,7 @@  static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
                     cpu_loop_exit(cpu);
                 } else {
                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
-                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
+                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1 | curr_cflags());
                     cpu_loop_exit_noexc(cpu);
                 }
             }
diff --git a/tcg/tcg-runtime.c b/tcg/tcg-runtime.c
index 7100339..4f873a9 100644
--- a/tcg/tcg-runtime.c
+++ b/tcg/tcg-runtime.c
@@ -151,7 +151,7 @@  void *HELPER(lookup_tb_ptr)(CPUArchState *env)
     target_ulong cs_base, pc;
     uint32_t flags;
 
-    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags);
+    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
     if (tb == NULL) {
         return tcg_ctx.code_gen_epilogue;
     }
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
index 11c1cec..4cabdfd 100644
--- a/tests/qht-bench.c
+++ b/tests/qht-bench.c
@@ -103,7 +103,7 @@  static bool is_equal(const void *obj, const void *userp)
 
 static inline uint32_t h(unsigned long v)
 {
-    return tb_hash_func6(v, 0, 0, 0);
+    return tb_hash_func7(v, 0, 0, 0, 0);
 }
 
 /*