diff mbox

[15/26] tcg-aarch64: Avoid add with zero in tlb load

Message ID 1394851732-25692-16-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson March 15, 2014, 2:48 a.m. UTC
Some guest env are small enough to reach the tlb with only a 12-bit addition.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/aarch64/tcg-target.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

Comments

Claudio Fontana March 26, 2014, 9:36 a.m. UTC | #1
On 15.03.2014 03:48, Richard Henderson wrote:
> Some guest env are small enough to reach the tlb with only a 12-bit addition.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/aarch64/tcg-target.c | 28 +++++++++++++++++++---------
>  1 file changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index aaa5f09..34eee6a 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -1119,47 +1119,57 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
>     slow path for the failure case, which will be patched later when finalizing
>     the slow path. Generated code returns the host addend in X1,
>     clobbers X0,X2,X3,TMP. */
> -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
> -            int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
> +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, int s_bits,
> +                             uint8_t **label_ptr, int mem_index, bool is_read)
>  {
>      TCGReg base = TCG_AREG0;
>      int tlb_offset = is_read ?
>          offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
>          : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
> +
>      /* Extract the TLB index from the address into X0.
>         X0<CPU_TLB_BITS:0> =
>         addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
> -    tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
> +    tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
>                   TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
> +
>      /* Store the page mask part of the address and the low s_bits into X3.
>         Later this allows checking for equality and alignment at the same time.
>         X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
>      tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
>                       addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
> +
>      /* Add any "high bits" from the tlb offset to the env address into X2,
>         to take advantage of the LSL12 form of the ADDI instruction.
>         X2 = env + (tlb_offset & 0xfff000) */
> -    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
> -                 tlb_offset & 0xfff000);
> +    if (tlb_offset & 0xfff000) {
> +        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
> +                     tlb_offset & 0xfff000);
> +        base = TCG_REG_X2;
> +    }
> +
>      /* Merge the tlb index contribution into X2.
>         X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
> -    tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2,
> +    tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
>                   TCG_REG_X0, CPU_TLB_ENTRY_BITS);
> +
>      /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
>         X0 = load [X2 + (tlb_offset & 0x000fff)] */
>      tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
> -                 LDST_LD, TCG_REG_X0, TCG_REG_X2,
> -                 (tlb_offset & 0xfff));
> +                 LDST_LD, TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
> +
>      /* Load the tlb addend. Do that early to avoid stalling.
>         X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
>      tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
>                   (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
>                   (is_read ? offsetof(CPUTLBEntry, addr_read)
>                    : offsetof(CPUTLBEntry, addr_write)));
> +
>      /* Perform the address comparison. */
>      tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
> -    *label_ptr = s->code_ptr;
> +
>      /* If not equal, we jump to the slow path. */
> +    *label_ptr = s->code_ptr;
>      tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
>  }
>  
> 

Reviewed-by: Claudio Fontana <claudio.fontana@huawei.com>
diff mbox

Patch

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index aaa5f09..34eee6a 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1119,47 +1119,57 @@  static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
    slow path for the failure case, which will be patched later when finalizing
    the slow path. Generated code returns the host addend in X1,
    clobbers X0,X2,X3,TMP. */
-static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
-            int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
+static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, int s_bits,
+                             uint8_t **label_ptr, int mem_index, bool is_read)
 {
     TCGReg base = TCG_AREG0;
     int tlb_offset = is_read ?
         offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
+
     /* Extract the TLB index from the address into X0.
        X0<CPU_TLB_BITS:0> =
        addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
-    tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
+    tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
                  TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
+
     /* Store the page mask part of the address and the low s_bits into X3.
        Later this allows checking for equality and alignment at the same time.
        X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
     tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
                      addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
     /* Add any "high bits" from the tlb offset to the env address into X2,
        to take advantage of the LSL12 form of the ADDI instruction.
        X2 = env + (tlb_offset & 0xfff000) */
-    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
-                 tlb_offset & 0xfff000);
+    if (tlb_offset & 0xfff000) {
+        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
+                     tlb_offset & 0xfff000);
+        base = TCG_REG_X2;
+    }
+
     /* Merge the tlb index contribution into X2.
        X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
-    tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2,
+    tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
                  TCG_REG_X0, CPU_TLB_ENTRY_BITS);
+
     /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
        X0 = load [X2 + (tlb_offset & 0x000fff)] */
     tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
-                 LDST_LD, TCG_REG_X0, TCG_REG_X2,
-                 (tlb_offset & 0xfff));
+                 LDST_LD, TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
+
     /* Load the tlb addend. Do that early to avoid stalling.
        X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
     tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
                  (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
                  (is_read ? offsetof(CPUTLBEntry, addr_read)
                   : offsetof(CPUTLBEntry, addr_write)));
+
     /* Perform the address comparison. */
     tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
-    *label_ptr = s->code_ptr;
+
     /* If not equal, we jump to the slow path. */
+    *label_ptr = s->code_ptr;
     tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
 }