diff mbox series

[v3,37/39] tcg/aarch64: Use LDP to load tlb mask+table

Message ID 20190508000641.19090-38-richard.henderson@linaro.org
State New
Headers show
Series tcg: Move the softmmu tlb to CPUNegativeOffsetState | expand

Commit Message

Richard Henderson May 8, 2019, 12:06 a.m. UTC
This changes the code generation for the tlb from e.g.

	ldur     x0, [x19, #0xffffffffffffffe0]
	ldur     x1, [x19, #0xffffffffffffffe8]
	and      x0, x0, x20, lsr #8
	add      x1, x1, x0
	ldr      x0, [x1]
	ldr      x1, [x1, #0x18]

to

	ldp      x0, x1, [x19, #-0x20]
	and      x0, x0, x20, lsr #8
	add      x1, x1, x0
	ldr      x0, [x1]
	ldr      x1, [x1, #0x18]

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Add QEMU_BUILD_BUG_ON for mask/table ordering; comment fixes.
---
 tcg/aarch64/tcg-target.inc.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

Comments

Alistair Francis May 10, 2019, 9:01 p.m. UTC | #1
On Tue, May 7, 2019 at 5:29 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This changes the code generation for the tlb from e.g.
>
>         ldur     x0, [x19, #0xffffffffffffffe0]
>         ldur     x1, [x19, #0xffffffffffffffe8]
>         and      x0, x0, x20, lsr #8
>         add      x1, x1, x0
>         ldr      x0, [x1]
>         ldr      x1, [x1, #0x18]
>
> to
>
>         ldp      x0, x1, [x19, #-0x20]
>         and      x0, x0, x20, lsr #8
>         add      x1, x1, x0
>         ldr      x0, [x1]
>         ldr      x1, [x1, #0x18]
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
> v3: Add QEMU_BUILD_BUG_ON for mask/table ordering; comment fixes.
> ---
>  tcg/aarch64/tcg-target.inc.c | 15 ++++++++-------
>  1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
> index afac018301..2863a371a0 100644
> --- a/tcg/aarch64/tcg-target.inc.c
> +++ b/tcg/aarch64/tcg-target.inc.c
> @@ -1459,6 +1459,10 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
>  QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
>  QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
>
> +/* These offsets are built into the LDP below.  */
> +QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
> +QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
> +
>  /* Load and compare a TLB entry, emitting the conditional jump to the
>     slow path for the failure case, which will be patched later when finalizing
>     the slow path. Generated code returns the host addend in X1,
> @@ -1467,23 +1471,20 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
>                               tcg_insn_unit **label_ptr, int mem_index,
>                               bool is_read)
>  {
> -    int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
> -    int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
> -    int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
>      unsigned a_bits = get_alignment_bits(opc);
>      unsigned s_bits = opc & MO_SIZE;
>      unsigned a_mask = (1u << a_bits) - 1;
>      unsigned s_mask = (1u << s_bits) - 1;
> -    TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
> +    TCGReg x3;
>      TCGType mask_type;
>      uint64_t compare_mask;
>
>      mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
>                   ? TCG_TYPE_I64 : TCG_TYPE_I32);
>
> -    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
> -    tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
> -    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
> +    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
> +    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
> +                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
>
>      /* Extract the TLB index from the address into X0.  */
>      tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
> --
> 2.17.1
>
>
diff mbox series

Patch

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index afac018301..2863a371a0 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -1459,6 +1459,10 @@  static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
 
+/* These offsets are built into the LDP below.  */
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
+
 /* Load and compare a TLB entry, emitting the conditional jump to the
    slow path for the failure case, which will be patched later when finalizing
    the slow path. Generated code returns the host addend in X1,
@@ -1467,23 +1471,20 @@  static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
                              tcg_insn_unit **label_ptr, int mem_index,
                              bool is_read)
 {
-    int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
-    int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
-    int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
     unsigned a_bits = get_alignment_bits(opc);
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_mask = (1u << a_bits) - 1;
     unsigned s_mask = (1u << s_bits) - 1;
-    TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
+    TCGReg x3;
     TCGType mask_type;
     uint64_t compare_mask;
 
     mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
                  ? TCG_TYPE_I64 : TCG_TYPE_I32);
 
-    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
-    tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
+    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
+    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
+                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
 
     /* Extract the TLB index from the address into X0.  */
     tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,