Patchwork [v3,19/29] tcg-aarch64: Introduce tcg_fmt_Rd_uimm_s

login
register
mail settings
Submitter Richard Henderson
Date Sept. 2, 2013, 5:54 p.m.
Message ID <1378144503-15808-20-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/272033/
State New
Headers show

Comments

Richard Henderson - Sept. 2, 2013, 5:54 p.m.
Cleaning up the implementation of tcg_out_movi at the same time.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/aarch64/tcg-target.c | 48 ++++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)
Claudio Fontana - Sept. 5, 2013, 1:32 p.m.
Hello Richard,

thanks for your prolific work. Few comments below for starters:

On 02.09.2013 19:54, Richard Henderson wrote:
> Cleaning up the implementation of tcg_out_movi at the same time.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/aarch64/tcg-target.c | 48 ++++++++++++++++++++++--------------------------
>  1 file changed, 22 insertions(+), 26 deletions(-)
> 
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index 09ccd67..59e5026 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -274,6 +274,11 @@ typedef enum {
>      INSN_EOR   = 0x4a000000,
>      INSN_EON   = 0x4a200000,
>  
> +    /* Move wide immediate instructions */
> +    INSN_MOVN  = 0x12800000,
> +    INSN_MOVZ  = 0x52800000,
> +    INSN_MOVK  = 0x72800000,
> +
>      /* Add/subtract immediate instructions */
>      INSN_ADDI  = 0x11000000,
>      INSN_ADDSI = 0x31000000,
> @@ -478,6 +483,12 @@ static inline void tcg_fmt_Rdnm_cond(TCGContext *s, AArch64Insn insn,
>                | tcg_cond_to_aarch64[c] << 12);
>  }
>  
> +static inline void tcg_fmt_Rd_uimm_s(TCGContext *s, AArch64Insn insn, bool ext,
> +                                     TCGReg rd, uint16_t half, unsigned shift)
> +{
> +    tcg_out32(s, insn | ext << 31 | shift << 17 | half << 5 | rd);
> +}
> +
>  static inline void tcg_out_ldst_9(TCGContext *s,
>                                    enum aarch64_ldst_op_data op_data,
>                                    enum aarch64_ldst_op_type op_type,
> @@ -522,38 +533,23 @@ static inline void tcg_out_movr_sp(TCGContext *s, bool ext,
>      tcg_fmt_Rdn_aimm(s, INSN_ADDI, ext, rd, rn, 0);
>  }
>  
> -static inline void tcg_out_movi_aux(TCGContext *s,
> -                                    TCGReg rd, uint64_t value)
> +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
> +                         tcg_target_long value)
>  {
> -    uint32_t half, base, shift, movk = 0;
> -    /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
> -    /* using MOVZ 0x52800000 | extended reg.. */
> -    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
> -    /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
> -       first MOVZ with the half-word immediate skipping the zeros, with a shift
> -       (LSL) equal to this number. Then morph all next instructions into MOVKs.
> -       Zero the processed half-word in the value, continue until empty.
> -       We build the final result 16bits at a time with up to 4 instructions,
> -       but do not emit instructions for 16bit zero holes. */

Please do not remove these comments.
In my judgement this part of the code profits from some verbose clarification.
What is happening might be obvious to you, but not to others trying to step in.

> +    AArch64Insn insn = INSN_MOVZ;
> +
> +    if (type == TCG_TYPE_I32) {
> +        value = (uint32_t)value;
> +    }
> +
>      do {
> -        shift = ctz64(value) & (63 & -16);
> -        half = (value >> shift) & 0xffff;
> -        tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
> -        movk = 0x20000000; /* morph next MOVZs into MOVKs */
> +        unsigned shift = ctz64(value) & (63 & -16);
> +        tcg_fmt_Rd_uimm_s(s, insn, shift >= 32, rd, value >> shift, shift);
>          value &= ~(0xffffUL << shift);
> +        insn = INSN_MOVK;
>      } while (value);
>  }
>  
> -static inline void tcg_out_movi(TCGContext *s, TCGType type,
> -                                TCGReg rd, tcg_target_long value)
> -{
> -    if (type == TCG_TYPE_I64) {
> -        tcg_out_movi_aux(s, rd, value);
> -    } else {
> -        tcg_out_movi_aux(s, rd, value & 0xffffffff);
> -    }
> -}
> -
>  static inline void tcg_out_ldst_r(TCGContext *s,
>                                    enum aarch64_ldst_op_data op_data,
>                                    enum aarch64_ldst_op_type op_type,
> 


Note that the movi change you introduce with the combination of patches 19 and 20 is not correct, breaks all targets I tried.
I will dig in the details tomorrow commenting patch 20.

In general I'd prefer to keep movi as it was (functionally-wise) for the time being, replacing it with a more efficient version once we can get some numbers (which will be soon) with which to justify (or not) the added code complexity.

But using the INSN_* you introduced instead of inline numbers is of course fine for me.

Claudio
Richard Henderson - Sept. 5, 2013, 3:41 p.m.
On 09/05/2013 06:32 AM, Claudio Fontana wrote:
>>  {
>> -    uint32_t half, base, shift, movk = 0;
>> -    /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
>> -    /* using MOVZ 0x52800000 | extended reg.. */
>> -    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
>> -    /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
>> -       first MOVZ with the half-word immediate skipping the zeros, with a shift
>> -       (LSL) equal to this number. Then morph all next instructions into MOVKs.
>> -       Zero the processed half-word in the value, continue until empty.
>> -       We build the final result 16bits at a time with up to 4 instructions,
>> -       but do not emit instructions for 16bit zero holes. */
> 
> Please do not remove these comments.
> In my judgement this part of the code profits from some verbose clarification.
> What is happening might be obvious to you, but not to others trying to step in.

Fair enough.

> In general I'd prefer to keep movi as it was (functionally-wise) for the
> time being, replacing it with a more efficient version once we can get some
> numbers (which will be soon) with which to justify (or not) the added code
> complexity.

The most important thing we're not doing at the moment is handling negative
numbers efficiently.  E.g. we're using 4 insns to load -1.



r~
Claudio Fontana - Sept. 6, 2013, 9:06 a.m.
On 05.09.2013 17:41, Richard Henderson wrote:
> On 09/05/2013 06:32 AM, Claudio Fontana wrote:
>>>  {
>>> -    uint32_t half, base, shift, movk = 0;
>>> -    /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
>>> -    /* using MOVZ 0x52800000 | extended reg.. */
>>> -    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
>>> -    /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
>>> -       first MOVZ with the half-word immediate skipping the zeros, with a shift
>>> -       (LSL) equal to this number. Then morph all next instructions into MOVKs.
>>> -       Zero the processed half-word in the value, continue until empty.
>>> -       We build the final result 16bits at a time with up to 4 instructions,
>>> -       but do not emit instructions for 16bit zero holes. */
>>
>> Please do not remove these comments.
>> In my judgement this part of the code profits from some verbose clarification.
>> What is happening might be obvious to you, but not to others trying to step in.
> 
> Fair enough.
> 
>> In general I'd prefer to keep movi as it was (functionally-wise) for the
>> time being, replacing it with a more efficient version once we can get some
>> numbers (which will be soon) with which to justify (or not) the added code
>> complexity.
> 
> The most important thing we're not doing at the moment is handling negative
> numbers efficiently.  E.g. we're using 4 insns to load -1.

Ok, lets punctually address that then.

> r~
> 

Claudio

Patch

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 09ccd67..59e5026 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -274,6 +274,11 @@  typedef enum {
     INSN_EOR   = 0x4a000000,
     INSN_EON   = 0x4a200000,
 
+    /* Move wide immediate instructions */
+    INSN_MOVN  = 0x12800000,
+    INSN_MOVZ  = 0x52800000,
+    INSN_MOVK  = 0x72800000,
+
     /* Add/subtract immediate instructions */
     INSN_ADDI  = 0x11000000,
     INSN_ADDSI = 0x31000000,
@@ -478,6 +483,12 @@  static inline void tcg_fmt_Rdnm_cond(TCGContext *s, AArch64Insn insn,
               | tcg_cond_to_aarch64[c] << 12);
 }
 
+static inline void tcg_fmt_Rd_uimm_s(TCGContext *s, AArch64Insn insn, bool ext,
+                                     TCGReg rd, uint16_t half, unsigned shift)
+{
+    tcg_out32(s, insn | ext << 31 | shift << 17 | half << 5 | rd);
+}
+
 static inline void tcg_out_ldst_9(TCGContext *s,
                                   enum aarch64_ldst_op_data op_data,
                                   enum aarch64_ldst_op_type op_type,
@@ -522,38 +533,23 @@  static inline void tcg_out_movr_sp(TCGContext *s, bool ext,
     tcg_fmt_Rdn_aimm(s, INSN_ADDI, ext, rd, rn, 0);
 }
 
-static inline void tcg_out_movi_aux(TCGContext *s,
-                                    TCGReg rd, uint64_t value)
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+                         tcg_target_long value)
 {
-    uint32_t half, base, shift, movk = 0;
-    /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
-    /* using MOVZ 0x52800000 | extended reg.. */
-    base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
-    /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
-       first MOVZ with the half-word immediate skipping the zeros, with a shift
-       (LSL) equal to this number. Then morph all next instructions into MOVKs.
-       Zero the processed half-word in the value, continue until empty.
-       We build the final result 16bits at a time with up to 4 instructions,
-       but do not emit instructions for 16bit zero holes. */
+    AArch64Insn insn = INSN_MOVZ;
+
+    if (type == TCG_TYPE_I32) {
+        value = (uint32_t)value;
+    }
+
     do {
-        shift = ctz64(value) & (63 & -16);
-        half = (value >> shift) & 0xffff;
-        tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
-        movk = 0x20000000; /* morph next MOVZs into MOVKs */
+        unsigned shift = ctz64(value) & (63 & -16);
+        tcg_fmt_Rd_uimm_s(s, insn, shift >= 32, rd, value >> shift, shift);
         value &= ~(0xffffUL << shift);
+        insn = INSN_MOVK;
     } while (value);
 }
 
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
-                                TCGReg rd, tcg_target_long value)
-{
-    if (type == TCG_TYPE_I64) {
-        tcg_out_movi_aux(s, rd, value);
-    } else {
-        tcg_out_movi_aux(s, rd, value & 0xffffffff);
-    }
-}
-
 static inline void tcg_out_ldst_r(TCGContext *s,
                                   enum aarch64_ldst_op_data op_data,
                                   enum aarch64_ldst_op_type op_type,