Patchwork [16/35] tcg-s390: Re-implement tcg_out_movi.

login
register
mail settings
Submitter Richard Henderson
Date June 4, 2010, 7:14 p.m.
Message ID <1275678883-7082-17-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/54683/
State New
Headers show

Comments

Richard Henderson - June 4, 2010, 7:14 p.m.
Make better use of the LOAD HALFWORD IMMEDIATE, LOAD IMMEDIATE,
and INSERT IMMEDIATE instruction groups.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c |  129 +++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 113 insertions(+), 16 deletions(-)
Aurelien Jarno - June 12, 2010, 12:04 p.m.
On Fri, Jun 04, 2010 at 12:14:24PM -0700, Richard Henderson wrote:
> Make better use of the LOAD HALFWORD IMMEDIATE, LOAD IMMEDIATE,
> and INSERT IMMEDIATE instruction groups.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/s390/tcg-target.c |  129 +++++++++++++++++++++++++++++++++++++++++++------
>  1 files changed, 113 insertions(+), 16 deletions(-)
> 
> diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
> index d99bb5c..71e017a 100644
> --- a/tcg/s390/tcg-target.c
> +++ b/tcg/s390/tcg-target.c
> @@ -52,12 +52,23 @@ typedef enum S390Opcode {
>      RIL_BRASL   = 0xc005,
>      RIL_BRCL    = 0xc004,
>      RIL_LARL    = 0xc000,
> +    RIL_IIHF    = 0xc008,
> +    RIL_IILF    = 0xc009,
> +    RIL_LGFI    = 0xc001,
> +    RIL_LLIHF   = 0xc00e,
> +    RIL_LLILF   = 0xc00f,
>  
>      RI_AGHI     = 0xa70b,
>      RI_AHI      = 0xa70a,
>      RI_BRC      = 0xa704,
> +    RI_IIHH     = 0xa500,
> +    RI_IIHL     = 0xa501,
>      RI_IILH     = 0xa502,
> +    RI_IILL     = 0xa503,
>      RI_LGHI     = 0xa709,
> +    RI_LLIHH    = 0xa50c,
> +    RI_LLIHL    = 0xa50d,
> +    RI_LLILH    = 0xa50e,
>      RI_LLILL    = 0xa50f,
>  
>      RRE_AGR     = 0xb908,
> @@ -382,24 +393,110 @@ static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
>  }
>  
>  /* load a register with an immediate value */
> -static inline void tcg_out_movi(TCGContext *s, TCGType type,
> -                int ret, tcg_target_long arg)
> +static void tcg_out_movi(TCGContext *s, TCGType type,
> +                         TCGReg ret, tcg_target_long sval)
>  {
> -    if (arg >= -0x8000 && arg < 0x8000) { /* signed immediate load */
> -        tcg_out_insn(s, RI, LGHI, ret, arg);
> -    } else if (!(arg & 0xffffffffffff0000UL)) {
> -        tcg_out_insn(s, RI, LLILL, ret, arg);
> -    } else if (!(arg & 0xffffffff00000000UL) || type == TCG_TYPE_I32) {
> -        tcg_out_insn(s, RI, LLILL, ret, arg);
> -        tcg_out_insn(s, RI, IILH, ret, arg >> 16);
> +    static const S390Opcode lli_insns[4] = {
> +        RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
> +    };
> +
> +    tcg_target_ulong uval = sval;
> +    int i;
> +
> +    if (type == TCG_TYPE_I32) {
> +        uval = (uint32_t)sval;
> +        sval = (int32_t)sval;
> +    }
> +
> +    /* Try all 32-bit insns that can load it in one go.  */
> +    if (sval >= -0x8000 && sval < 0x8000) {
> +        tcg_out_insn(s, RI, LGHI, ret, sval);
> +        return;
> +    }
> +
> +    for (i = 0; i < 4; i++) {
> +        tcg_target_long mask = 0xffffull << i*16;
> +        if ((uval & mask) != 0 && (uval & ~mask) == 0) {

Wouldn't it be simpler to use (uval & mask) == uval ?

> +            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
> +            return;
> +        }
> +    }
> +
> +    /* Try all 48-bit insns that can load it in one go.  */
> +    if (facilities & FACILITY_EXT_IMM) {
> +        if (sval == (int32_t)sval) {
> +            tcg_out_insn(s, RIL, LGFI, ret, sval);
> +            return;
> +        }
> +        if (uval <= 0xffffffff) {
> +            tcg_out_insn(s, RIL, LLILF, ret, uval);
> +            return;
> +        }
> +        if ((uval & 0xffffffff) == 0) {
> +            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
> +            return;
> +        }
> +    }
> +
> +    /* Try for PC-relative address load.  */
> +    if ((sval & 1) == 0) {
> +        intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1;
> +        if (off == (int32_t)off) {
> +            tcg_out_insn(s, RIL, LARL, ret, off);
> +            return;
> +        }
> +    }

Is this part used in practice? There was such a trick on the ARM
backend, but it was actually never used.

> +
> +    /* If extended immediates are not present, then we may have to issue
> +       several instructions to load the low 32 bits.  */
> +    if (!(facilities & FACILITY_EXT_IMM)) {
> +        /* A 32-bit unsigned value can be loaded in 2 insns.  And given
> +           that the lli_insns loop above did not succeed, we know that
> +           both insns are required.  */
> +        if (uval <= 0xffffffff) {
> +            tcg_out_insn(s, RI, LLILL, ret, uval);
> +            tcg_out_insn(s, RI, IILH, ret, uval >> 16);
> +            return;
> +        }
> +
> +        /* If all high bits are set, the value can be loaded in 2 or 3 insns.
> +           We first want to make sure that all the high bits get set.  With
> +           luck the low 16-bits can be considered negative to perform that for
> +           free, otherwise we load an explicit -1.  */
> +        if (sval >> 32 == -1) {
> +            if (uval & 0x8000) {
> +                tcg_out_insn(s, RI, LGHI, ret, uval);
> +            } else {
> +                tcg_out_insn(s, RI, LGHI, ret, -1);
> +                tcg_out_insn(s, RI, IILL, ret, uval);
> +            }
> +            tcg_out_insn(s, RI, IILH, ret, uval >> 16);
> +            return;
> +        }
> +    }
> +
> +    /* If we get here, both the high and low parts have non-zero bits.  */
> +
> +    /* Recurse to load the lower 32-bits.  */
> +    tcg_out_movi(s, TCG_TYPE_I32, ret, sval);
> +
> +    /* Insert data into the high 32-bits.  */
> +    uval >>= 32;
> +    if (facilities & FACILITY_EXT_IMM) {
> +        if (uval < 0x10000) {
> +            tcg_out_insn(s, RI, IIHL, ret, uval);
> +        } else if ((uval & 0xffff) == 0) {
> +            tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
> +        } else {
> +            tcg_out_insn(s, RIL, IIHF, ret, uval);
> +        }
>      } else {
> -        /* branch over constant and store its address in R13 */
> -        tcg_out_insn(s, RIL, BRASL, TCG_TMP0, (6 + 8) >> 1);
> -        /* 64-bit constant */
> -        tcg_out32(s, arg >> 32);
> -        tcg_out32(s, arg);
> -        /* load constant to ret */
> -        tcg_out_insn(s, RXY, LG, ret, TCG_TMP0, 0, 0);
> +        if (uval & 0xffff) {
> +            tcg_out_insn(s, RI, IIHL, ret, uval);
> +        }
> +        if (uval & 0xffff0000) {
> +            tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
> +        }
>      }
>  }
>  
> -- 
> 1.7.0.1
> 
> 
>
Richard Henderson - June 13, 2010, 11:19 p.m.
On 06/12/2010 05:04 AM, Aurelien Jarno wrote:
>> +    for (i = 0; i < 4; i++) {
>> +        tcg_target_long mask = 0xffffull << i*16;
>> +        if ((uval & mask) != 0 && (uval & ~mask) == 0) {
> 
> Wouldn't it be simpler to use (uval & mask) == uval ?

Doh.

>> +    /* Try for PC-relative address load.  */
>> +    if ((sval & 1) == 0) {
>> +        intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1;
>> +        if (off == (int32_t)off) {
>> +            tcg_out_insn(s, RIL, LARL, ret, off);
>> +            return;
>> +        }
>> +    }
> 
> Is this part used in practice? There was such a trick on the ARM
> backend, but it was actually never used.

Yes.  The difference here is we have a +- 4GB displacement.

This is primarily used when the extended-immediate facility is not present;
we can generate all even 32-bit constants from LARL, given the placement of
the code_gen_buffer.


r~

Patch

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index d99bb5c..71e017a 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -52,12 +52,23 @@  typedef enum S390Opcode {
     RIL_BRASL   = 0xc005,
     RIL_BRCL    = 0xc004,
     RIL_LARL    = 0xc000,
+    RIL_IIHF    = 0xc008,
+    RIL_IILF    = 0xc009,
+    RIL_LGFI    = 0xc001,
+    RIL_LLIHF   = 0xc00e,
+    RIL_LLILF   = 0xc00f,
 
     RI_AGHI     = 0xa70b,
     RI_AHI      = 0xa70a,
     RI_BRC      = 0xa704,
+    RI_IIHH     = 0xa500,
+    RI_IIHL     = 0xa501,
     RI_IILH     = 0xa502,
+    RI_IILL     = 0xa503,
     RI_LGHI     = 0xa709,
+    RI_LLIHH    = 0xa50c,
+    RI_LLIHL    = 0xa50d,
+    RI_LLILH    = 0xa50e,
     RI_LLILL    = 0xa50f,
 
     RRE_AGR     = 0xb908,
@@ -382,24 +393,110 @@  static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
 }
 
 /* load a register with an immediate value */
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
-                int ret, tcg_target_long arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long sval)
 {
-    if (arg >= -0x8000 && arg < 0x8000) { /* signed immediate load */
-        tcg_out_insn(s, RI, LGHI, ret, arg);
-    } else if (!(arg & 0xffffffffffff0000UL)) {
-        tcg_out_insn(s, RI, LLILL, ret, arg);
-    } else if (!(arg & 0xffffffff00000000UL) || type == TCG_TYPE_I32) {
-        tcg_out_insn(s, RI, LLILL, ret, arg);
-        tcg_out_insn(s, RI, IILH, ret, arg >> 16);
+    static const S390Opcode lli_insns[4] = {
+        RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
+    };
+
+    tcg_target_ulong uval = sval;
+    int i;
+
+    if (type == TCG_TYPE_I32) {
+        uval = (uint32_t)sval;
+        sval = (int32_t)sval;
+    }
+
+    /* Try all 32-bit insns that can load it in one go.  */
+    if (sval >= -0x8000 && sval < 0x8000) {
+        tcg_out_insn(s, RI, LGHI, ret, sval);
+        return;
+    }
+
+    for (i = 0; i < 4; i++) {
+        tcg_target_long mask = 0xffffull << i*16;
+        if ((uval & mask) != 0 && (uval & ~mask) == 0) {
+            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
+            return;
+        }
+    }
+
+    /* Try all 48-bit insns that can load it in one go.  */
+    if (facilities & FACILITY_EXT_IMM) {
+        if (sval == (int32_t)sval) {
+            tcg_out_insn(s, RIL, LGFI, ret, sval);
+            return;
+        }
+        if (uval <= 0xffffffff) {
+            tcg_out_insn(s, RIL, LLILF, ret, uval);
+            return;
+        }
+        if ((uval & 0xffffffff) == 0) {
+            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
+            return;
+        }
+    }
+
+    /* Try for PC-relative address load.  */
+    if ((sval & 1) == 0) {
+        intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1;
+        if (off == (int32_t)off) {
+            tcg_out_insn(s, RIL, LARL, ret, off);
+            return;
+        }
+    }
+
+    /* If extended immediates are not present, then we may have to issue
+       several instructions to load the low 32 bits.  */
+    if (!(facilities & FACILITY_EXT_IMM)) {
+        /* A 32-bit unsigned value can be loaded in 2 insns.  And given
+           that the lli_insns loop above did not succeed, we know that
+           both insns are required.  */
+        if (uval <= 0xffffffff) {
+            tcg_out_insn(s, RI, LLILL, ret, uval);
+            tcg_out_insn(s, RI, IILH, ret, uval >> 16);
+            return;
+        }
+
+        /* If all high bits are set, the value can be loaded in 2 or 3 insns.
+           We first want to make sure that all the high bits get set.  With
+           luck the low 16-bits can be considered negative to perform that for
+           free, otherwise we load an explicit -1.  */
+        if (sval >> 32 == -1) {
+            if (uval & 0x8000) {
+                tcg_out_insn(s, RI, LGHI, ret, uval);
+            } else {
+                tcg_out_insn(s, RI, LGHI, ret, -1);
+                tcg_out_insn(s, RI, IILL, ret, uval);
+            }
+            tcg_out_insn(s, RI, IILH, ret, uval >> 16);
+            return;
+        }
+    }
+
+    /* If we get here, both the high and low parts have non-zero bits.  */
+
+    /* Recurse to load the lower 32-bits.  */
+    tcg_out_movi(s, TCG_TYPE_I32, ret, sval);
+
+    /* Insert data into the high 32-bits.  */
+    uval >>= 32;
+    if (facilities & FACILITY_EXT_IMM) {
+        if (uval < 0x10000) {
+            tcg_out_insn(s, RI, IIHL, ret, uval);
+        } else if ((uval & 0xffff) == 0) {
+            tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
+        } else {
+            tcg_out_insn(s, RIL, IIHF, ret, uval);
+        }
     } else {
-        /* branch over constant and store its address in R13 */
-        tcg_out_insn(s, RIL, BRASL, TCG_TMP0, (6 + 8) >> 1);
-        /* 64-bit constant */
-        tcg_out32(s, arg >> 32);
-        tcg_out32(s, arg);
-        /* load constant to ret */
-        tcg_out_insn(s, RXY, LG, ret, TCG_TMP0, 0, 0);
+        if (uval & 0xffff) {
+            tcg_out_insn(s, RI, IIHL, ret, uval);
+        }
+        if (uval & 0xffff0000) {
+            tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
+        }
     }
 }