diff mbox

[07/26] tcg-aarch64: Use adrp in tcg_out_movi

Message ID 1394851732-25692-8-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson March 15, 2014, 2:48 a.m. UTC
Loading an qemu pointer as an immediate happens often.  E.g.

- exit_tb $0x7fa8140013
+ exit_tb $0x7f81ee0013
...
- :  d2800260        mov     x0, #0x13
- :  f2b50280        movk    x0, #0xa814, lsl #16
- :  f2c00fe0        movk    x0, #0x7f, lsl #32
+ :  90ff1000        adrp    x0, 0x7f81ee0000
+ :  91004c00        add     x0, x0, #0x13

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/aarch64/tcg-target.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

Comments

Claudio Fontana March 24, 2014, 2:05 p.m. UTC | #1
On 15.03.2014 03:48, Richard Henderson wrote:
> Loading an qemu pointer as an immediate happens often.  E.g.
> 
> - exit_tb $0x7fa8140013
> + exit_tb $0x7f81ee0013
> ...
> - :  d2800260        mov     x0, #0x13
> - :  f2b50280        movk    x0, #0xa814, lsl #16
> - :  f2c00fe0        movk    x0, #0x7f, lsl #32
> + :  90ff1000        adrp    x0, 0x7f81ee0000
> + :  91004c00        add     x0, x0, #0x13
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/aarch64/tcg-target.c | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index 417f51a..4944eb6 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -294,6 +294,10 @@ typedef enum {
>      I3405_MOVZ      = 0x52800000,
>      I3405_MOVK      = 0x72800000,
>  
> +    /* PC relative addressing instructions.  */
> +    I3406_ADR       = 0x10000000,
> +    I3406_ADRP      = 0x90000000,
> +
>      /* Add/subtract shifted register instructions (without a shift).  */
>      I3502_ADD       = 0x0b000000,
>      I3502_ADDS      = 0x2b000000,
> @@ -457,6 +461,12 @@ static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
>      tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
>  }
>  
> +static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
> +                              TCGReg rd, int64_t disp)
> +{
> +    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
> +}
> +
>  /* This function is for both 3.5.2 (Add/Subtract shifted register), for
>     the rare occasion when we actually want to supply a shift amount.  */
>  static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
> @@ -564,7 +574,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
>      AArch64Insn insn;
>      int i, wantinv, shift;
>      tcg_target_long svalue = value;
> -    tcg_target_long ivalue, imask;
> +    tcg_target_long ivalue, imask, disp;
>  
>      /* For 32-bit values, discard potential garbage in value.  For 64-bit
>         values within [2**31, 2**32-1], we can create smaller sequences by
> @@ -595,6 +605,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
>          return;
>      }
>  
> +    /* Look for host pointer values within 4G of the PC.  This happens
> +       often when loading pointers to QEMU's own data structures.  */
> +    disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
> +    if (disp == sextract64(disp, 0, 21)) {

nit.. for the check to be correct in all cases, the assumption here is that intptr_t is the same size as a signed target long; would a cast to tcg_target_long instead of intptr_t be "safer"?

> +        tcg_out_insn(s, 3406, ADRP, rd, disp);
> +        if (value & 0xfff) {
> +            tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
> +        }
> +        return;
> +    }
> +
>      /* Would it take fewer insns to begin with MOVN?  For the value and its
>         inverse, count the number of 16-bit lanes that are 0.  */
>      for (i = wantinv = imask = 0; i < (32 << type); i += 16) {
>
Richard Henderson March 24, 2014, 3:36 p.m. UTC | #2
On 03/24/2014 07:05 AM, Claudio Fontana wrote:
>> > +    /* Look for host pointer values within 4G of the PC.  This happens
>> > +       often when loading pointers to QEMU's own data structures.  */
>> > +    disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
>> > +    if (disp == sextract64(disp, 0, 21)) {
>
> nit.. for the check to be correct in all cases, the assumption here is that
> intptr_t is the same size as a signed target long; would a cast to
> tcg_target_long instead of intptr_t be "safer"?
> 

I don't think so.

Gcc 4.9 supports an -m32 abi for aarch64.  Suppose we were to compile qemu this
way.  In that case tcg_target_long would be larger than intptr_t, and the cast
here would Werror.  But leaving it intptr_t, we get a proper sign-extension
with type promotion to tcg_target_long, and the arithmetic will in fact work
like expected.


r~
Claudio Fontana March 26, 2014, 9:34 a.m. UTC | #3
On 15.03.2014 03:48, Richard Henderson wrote:
> Loading an qemu pointer as an immediate happens often.  E.g.
> 
> - exit_tb $0x7fa8140013
> + exit_tb $0x7f81ee0013
> ...
> - :  d2800260        mov     x0, #0x13
> - :  f2b50280        movk    x0, #0xa814, lsl #16
> - :  f2c00fe0        movk    x0, #0x7f, lsl #32
> + :  90ff1000        adrp    x0, 0x7f81ee0000
> + :  91004c00        add     x0, x0, #0x13
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/aarch64/tcg-target.c | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index 417f51a..4944eb6 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -294,6 +294,10 @@ typedef enum {
>      I3405_MOVZ      = 0x52800000,
>      I3405_MOVK      = 0x72800000,
>  
> +    /* PC relative addressing instructions.  */
> +    I3406_ADR       = 0x10000000,
> +    I3406_ADRP      = 0x90000000,
> +
>      /* Add/subtract shifted register instructions (without a shift).  */
>      I3502_ADD       = 0x0b000000,
>      I3502_ADDS      = 0x2b000000,
> @@ -457,6 +461,12 @@ static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
>      tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
>  }
>  
> +static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
> +                              TCGReg rd, int64_t disp)
> +{
> +    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
> +}
> +
>  /* This function is for both 3.5.2 (Add/Subtract shifted register), for
>     the rare occasion when we actually want to supply a shift amount.  */
>  static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
> @@ -564,7 +574,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
>      AArch64Insn insn;
>      int i, wantinv, shift;
>      tcg_target_long svalue = value;
> -    tcg_target_long ivalue, imask;
> +    tcg_target_long ivalue, imask, disp;
>  
>      /* For 32-bit values, discard potential garbage in value.  For 64-bit
>         values within [2**31, 2**32-1], we can create smaller sequences by
> @@ -595,6 +605,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
>          return;
>      }
>  
> +    /* Look for host pointer values within 4G of the PC.  This happens
> +       often when loading pointers to QEMU's own data structures.  */
> +    disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
> +    if (disp == sextract64(disp, 0, 21)) {
> +        tcg_out_insn(s, 3406, ADRP, rd, disp);
> +        if (value & 0xfff) {
> +            tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
> +        }
> +        return;
> +    }
> +
>      /* Would it take fewer insns to begin with MOVN?  For the value and its
>         inverse, count the number of 16-bit lanes that are 0.  */
>      for (i = wantinv = imask = 0; i < (32 << type); i += 16) {
> 

Reviewed-by: Claudio Fontana <claudio.fontana@huawei.com>
diff mbox

Patch

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 417f51a..4944eb6 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -294,6 +294,10 @@  typedef enum {
     I3405_MOVZ      = 0x52800000,
     I3405_MOVK      = 0x72800000,
 
+    /* PC relative addressing instructions.  */
+    I3406_ADR       = 0x10000000,
+    I3406_ADRP      = 0x90000000,
+
     /* Add/subtract shifted register instructions (without a shift).  */
     I3502_ADD       = 0x0b000000,
     I3502_ADDS      = 0x2b000000,
@@ -457,6 +461,12 @@  static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
     tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 }
 
+static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
+                              TCGReg rd, int64_t disp)
+{
+    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
+}
+
 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
    the rare occasion when we actually want to supply a shift amount.  */
 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
@@ -564,7 +574,7 @@  static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
     AArch64Insn insn;
     int i, wantinv, shift;
     tcg_target_long svalue = value;
-    tcg_target_long ivalue, imask;
+    tcg_target_long ivalue, imask, disp;
 
     /* For 32-bit values, discard potential garbage in value.  For 64-bit
        values within [2**31, 2**32-1], we can create smaller sequences by
@@ -595,6 +605,17 @@  static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
         return;
     }
 
+    /* Look for host pointer values within 4G of the PC.  This happens
+       often when loading pointers to QEMU's own data structures.  */
+    disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
+    if (disp == sextract64(disp, 0, 21)) {
+        tcg_out_insn(s, 3406, ADRP, rd, disp);
+        if (value & 0xfff) {
+            tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
+        }
+        return;
+    }
+
     /* Would it take fewer insns to begin with MOVN?  For the value and its
        inverse, count the number of 16-bit lanes that are 0.  */
     for (i = wantinv = imask = 0; i < (32 << type); i += 16) {