Patchwork [51/62] tcg-s390: Conditionalize AND IMMEDIATE instructions.

login
register
mail settings
Submitter Richard Henderson
Date May 27, 2010, 8:46 p.m.
Message ID <1274993204-30766-52-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/53830/
State New
Headers show

Comments

Richard Henderson - May 27, 2010, 8:46 p.m.
The 32-bit immediate AND instructions are in the extended-immediate
facility.  Use these only if present.

At the same time, pull the logic to load immediates into registers
into a constraint letter for TCG.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c |  209 ++++++++++++++++++++++++++++--------------------
 1 files changed, 122 insertions(+), 87 deletions(-)

Patch

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 8a7c9ae..359f6d1 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -33,10 +33,11 @@ 
     do { } while (0)
 #endif
 
-#define TCG_CT_CONST_32    0x100
-#define TCG_CT_CONST_NEG   0x200
-#define TCG_CT_CONST_ADDI  0x400
-#define TCG_CT_CONST_MULI  0x800
+#define TCG_CT_CONST_32    0x0100
+#define TCG_CT_CONST_NEG   0x0200
+#define TCG_CT_CONST_ADDI  0x0400
+#define TCG_CT_CONST_MULI  0x0800
+#define TCG_CT_CONST_ANDI  0x1000
 
 #define TCG_TMP0           TCG_REG_R14
 
@@ -353,6 +354,10 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         ct->ct &= ~TCG_CT_REG;
         ct->ct |= TCG_CT_CONST_MULI;
         break;
+    case 'A':
+        ct->ct &= ~TCG_CT_REG;
+        ct->ct |= TCG_CT_CONST_ANDI;
+        break;
     default:
         break;
     }
@@ -362,9 +367,66 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     return 0;
 }
 
+/* Immediates to be used with logical AND.  This is an optimization only,
+   since a full 64-bit immediate AND can always be performed with 4 sequential
+   NI[LH][LH] instructions.  What we're looking for is immediates that we
+   can load efficiently, and the immediate load plus the reg-reg AND is
+   smaller than the sequential NI's.  */
+
+static int tcg_match_andi(int ct, tcg_target_ulong val)
+{
+    int i;
+
+    if (facilities & FACILITY_EXT_IMM) {
+        if (ct & TCG_CT_CONST_32) {
+            /* All 32-bit ANDs can be performed with 1 48-bit insn.  */
+            return 1;
+        }
+
+        /* Zero-extensions.  */
+        if (val == 0xff || val == 0xffff || val == 0xffffffff) {
+            return 1;
+        }
+    } else {
+        if (ct & TCG_CT_CONST_32) {
+            val = (uint32_t)val;
+        } else if (val == 0xffffffff) {
+            return 1;
+        }
+    }
+
+    /* Try all 32-bit insns that can perform it in one go.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = ~(0xffffull << i*16);
+        if ((val & mask) == mask) {
+            return 1;
+        }
+    }
+
+    /* Look for 16-bit values performing the mask.  These are better
+       to load with LLI[LH][LH].  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = 0xffffull << i*16;
+        if ((val & mask) == val) {
+            return 0;
+        }
+    }
+
+    /* Look for 32-bit values performing the 64-bit mask.  These
+       are better to load with LLI[LH]F, or if extended immediates
+       not available, with a pair of LLI insns.  */
+    if ((ct & TCG_CT_CONST_32) == 0) {
+        if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
 /* Test if a constant matches the constraint. */
-static inline int tcg_target_const_match(tcg_target_long val,
-                                         const TCGArgConstraint *arg_ct)
+static int tcg_target_const_match(tcg_target_long val,
+                                  const TCGArgConstraint *arg_ct)
 {
     int ct = arg_ct->ct;
 
@@ -401,6 +463,8 @@  static inline int tcg_target_const_match(tcg_target_long val,
         } else {
             return val == (int16_t)val;
         }
+    } else if (ct & TCG_CT_CONST_ANDI) {
+        return tcg_match_andi(ct, val);
     }
 
     return 0;
@@ -764,37 +828,6 @@  static void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val)
 
 }
 
-static void tgen32_andi(TCGContext *s, TCGReg dest, uint32_t val)
-{
-    /* Zero-th, look for no-op.  */
-    if (val == -1) {
-        return;
-    }
-
-    /* First, look for the zero-extensions.  */
-    if (val == 0xff) {
-        tgen_ext8u(s, dest, dest);
-        return;
-    }
-    if (val == 0xffff) {
-        tgen_ext16u(s, dest, dest);
-        return;
-    }
-
-    /* Second, try all 32-bit insns that can perform it in one go.  */
-    if ((val & 0xffff0000) == 0xffff0000) {
-        tcg_out_insn(s, RI, NILL, dest, val);
-        return;
-    }
-    if ((val & 0x0000ffff) == 0x0000ffff) {
-        tcg_out_insn(s, RI, NILH, dest, val >> 16);
-        return;
-    }
-
-    /* Lastly, perform the entire operation with a 48-bit insn.  */
-    tcg_out_insn(s, RIL, NILF, dest, val);
-}
-
 static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
 {
     static const S390Opcode ni_insns[4] = {
@@ -806,69 +839,61 @@  static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
 
     int i;
 
-    /* Zero-th, look for no-op.  */
+    /* Look for no-op.  */
     if (val == -1) {
         return;
     }
 
-    /* First, look for the zero-extensions.  */
-    if (val == 0xff) {
-        tgen_ext8u(s, dest, dest);
-        return;
-    }
-    if (val == 0xffff) {
-        tgen_ext16u(s, dest, dest);
-        return;
-    }
+    /* Look for the zero-extensions.  */
     if (val == 0xffffffff) {
         tgen_ext32u(s, dest, dest);
         return;
     }
 
-    /* Second, try all 32-bit insns that can perform it in one go.  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = ~(0xffffull << i*16);
-        if ((val & mask) == mask) {
-            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+    if (facilities & FACILITY_EXT_IMM) {
+        if (val == 0xff) {
+            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
             return;
         }
-    }
-
-    /* Third, try all 48-bit insns that can perform it in one go.  */
-    for (i = 0; i < 2; i++) {
-        tcg_target_ulong mask = ~(0xffffffffull << i*32);
-        if ((val & mask) == mask) {
-            tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+        if (val == 0xffff) {
+            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
             return;
         }
-    }
 
-    /* Fourth, look for masks that can be loaded with one instruction
-       into a register.  This is slightly smaller than using two 48-bit
-       masks, as below.  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = ~(0xffffull << i*16);
-        if ((val & mask) == 0) {
-            tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
-            tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
-            return;
+        /* Try all 32-bit insns that can perform it in one go.  */
+        for (i = 0; i < 4; i++) {
+            tcg_target_ulong mask = ~(0xffffull << i*16);
+            if ((val & mask) == mask) {
+                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+                return;
+            }
         }
-    }
 
-    for (i = 0; i < 2; i++) {
-        tcg_target_ulong mask = ~(0xffffffffull << i*32);
-        if ((val & mask) == 0) {
-            tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
-            tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
-            return;
+        /* Try all 48-bit insns that can perform it in one go.  */
+        if (facilities & FACILITY_EXT_IMM) {
+            for (i = 0; i < 2; i++) {
+                tcg_target_ulong mask = ~(0xffffffffull << i*32);
+                if ((val & mask) == mask) {
+                    tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+                    return;
+                }
+            }
         }
-    }
 
-    /* Last, perform the AND via sequential modifications to the
-       high and low parts.  Do this via recursion to handle 16-bit
-       vs 32-bit masks in each half.  */
-    tgen64_andi(s, dest, val | 0xffffffff00000000ull);
-    tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+        /* Perform the AND via sequential modifications to the high and low
+           parts.  Do this via recursion to handle 16-bit vs 32-bit masks in
+           each half.  */
+        tgen64_andi(s, dest, val | 0xffffffff00000000ull);
+        tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+    } else {
+        /* With no extended-immediate facility, just emit the sequence.  */
+        for (i = 0; i < 4; i++) {
+            tcg_target_ulong mask = 0xffffull << i*16;
+            if ((val & mask) != mask) {
+                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+            }
+        }
+    }
 }
 
 static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
@@ -1121,6 +1146,16 @@  static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
 }
 
 #if defined(CONFIG_SOFTMMU)
+static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+    if (tcg_match_andi(0, val)) {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
+        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
+    } else {
+        tgen64_andi(s, dest, val);
+    }
+}
+
 static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
                                   TCGReg addr_reg, int mem_index, int opc,
                                   uint16_t **label2_ptr_p, int is_store)
@@ -1140,8 +1175,8 @@  static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, SH64_REG_NONE,
                  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tgen64_andi(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    tgen64_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+    tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
     if (is_store) {
         ofs = offsetof(CPUState, tlb_table[mem_index][0].addr_write);
@@ -1413,7 +1448,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_and_i32:
         if (const_args[2]) {
-            tgen32_andi(s, args[0], args[2]);
+            tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
         } else {
             tcg_out_insn(s, RR, NR, args[0], args[2]);
         }
@@ -1728,7 +1763,7 @@  static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
 
-    { INDEX_op_and_i32, { "r", "0", "ri" } },
+    { INDEX_op_and_i32, { "r", "0", "rWA" } },
     { INDEX_op_or_i32, { "r", "0", "ri" } },
     { INDEX_op_xor_i32, { "r", "0", "ri" } },
     { INDEX_op_neg_i32, { "r", "r" } },
@@ -1789,7 +1824,7 @@  static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
 
-    { INDEX_op_and_i64, { "r", "0", "ri" } },
+    { INDEX_op_and_i64, { "r", "0", "rA" } },
     { INDEX_op_or_i64, { "r", "0", "ri" } },
     { INDEX_op_xor_i64, { "r", "0", "ri" } },
     { INDEX_op_neg_i64, { "r", "r" } },