Patchwork [32/62] tcg-s390: Implement immediate ANDs.

login
register
mail settings
Submitter Richard Henderson
Date May 27, 2010, 8:46 p.m.
Message ID <1274993204-30766-33-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/53808/
State New
Headers show

Comments

Richard Henderson - May 27, 2010, 8:46 p.m.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c |  138 +++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 127 insertions(+), 11 deletions(-)

Patch

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index caa2d0d..2fd58bd 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -51,6 +51,8 @@  typedef enum S390Opcode {
     RIL_LGFI    = 0xc001,
     RIL_LLIHF   = 0xc00e,
     RIL_LLILF   = 0xc00f,
+    RIL_NIHF    = 0xc00a,
+    RIL_NILF    = 0xc00b,
 
     RI_AGHI     = 0xa70b,
     RI_AHI      = 0xa70a,
@@ -64,6 +66,10 @@  typedef enum S390Opcode {
     RI_LLIHL    = 0xa50d,
     RI_LLILH    = 0xa50e,
     RI_LLILL    = 0xa50f,
+    RI_NIHH     = 0xa504,
+    RI_NIHL     = 0xa505,
+    RI_NILH     = 0xa506,
+    RI_NILL     = 0xa507,
 
     RRE_AGR     = 0xb908,
     RRE_CGR     = 0xb920,
@@ -555,6 +561,113 @@  static inline void tgen64_addi(TCGContext *s, TCGReg dest, tcg_target_long val)
     }
 }
 
+static void tgen32_andi(TCGContext *s, TCGReg dest, uint32_t val)
+{
+    /* Zero-th, look for no-op.  */
+    if (val == -1) {
+        return;
+    }
+
+    /* First, look for the zero-extensions.  */
+    if (val == 0xff) {
+        tgen_ext8u(s, dest, dest);
+        return;
+    }
+    if (val == 0xffff) {
+        tgen_ext16u(s, dest, dest);
+        return;
+    }
+
+    /* Second, try all 32-bit insns that can perform it in one go.  */
+    if ((val & 0xffff0000) == 0xffff0000) {
+        tcg_out_insn(s, RI, NILL, dest, val);
+        return;
+    }
+    if ((val & 0x0000ffff) == 0x0000ffff) {
+        tcg_out_insn(s, RI, NILH, dest, val >> 16);
+        return;
+    }
+
+    /* Lastly, perform the entire operation with a 48-bit insn.  */
+    tcg_out_insn(s, RIL, NILF, dest, val);
+}
+
+static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+    static const S390Opcode ni_insns[4] = {
+        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
+    };
+    static const S390Opcode nif_insns[2] = {
+        RIL_NILF, RIL_NIHF
+    };
+
+    int i;
+
+    /* Zero-th, look for no-op.  */
+    if (val == -1) {
+        return;
+    }
+
+    /* First, look for the zero-extensions.  */
+    if (val == 0xff) {
+        tgen_ext8u(s, dest, dest);
+        return;
+    }
+    if (val == 0xffff) {
+        tgen_ext16u(s, dest, dest);
+        return;
+    }
+    if (val == 0xffffffff) {
+        tgen_ext32u(s, dest, dest);
+        return;
+    }
+
+    /* Second, try all 32-bit insns that can perform it in one go.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = ~(0xffffull << i*16);
+        if ((val & mask) == mask) {
+            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+            return;
+        }
+    }
+
+    /* Third, try all 48-bit insns that can perform it in one go.  */
+    for (i = 0; i < 2; i++) {
+        tcg_target_ulong mask = ~(0xffffffffull << i*32);
+        if ((val & mask) == mask) {
+            tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+            return;
+        }
+    }
+
+    /* Fourth, look for masks that can be loaded with one instruction
+       into a register.  This is slightly smaller than using two 48-bit
+       masks, as below.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = ~(0xffffull << i*16);
+        if ((val & mask) == 0) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R13, val);
+            tcg_out_insn(s, RRE, NGR, dest, TCG_REG_R13);
+            return;
+        }
+    }
+
+    for (i = 0; i < 2; i++) {
+        tcg_target_ulong mask = ~(0xffffffffull << i*32);
+        if ((val & mask) == 0) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R13, val);
+            tcg_out_insn(s, RRE, NGR, dest, TCG_REG_R13);
+            return;
+        }
+    }
+
+    /* Last, perform the AND via sequential modifications to the
+       high and low parts.  Do this via recursion to handle 16-bit
+       vs 32-bit masks in each half.  */
+    tgen64_andi(s, dest, val | 0xffffffff00000000ull);
+    tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+}
+
 static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2)
 {
     if (c > TCG_COND_GT) {
@@ -655,13 +768,8 @@  static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg,
     tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, SH64_REG_NONE,
                  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13,
-                 TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    tcg_out_insn(s, RRE, NGR, arg0, TCG_REG_R13);
-
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13,
-                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-    tcg_out_insn(s, RRE, NGR, arg1, TCG_REG_R13);
+    tgen64_andi(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+    tgen64_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
     if (is_store) {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13,
@@ -1029,7 +1137,11 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_and_i32:
-        tcg_out_insn(s, RR, NR, args[0], args[2]);
+        if (const_args[2]) {
+            tgen32_andi(s, args[0], args[2]);
+        } else {
+            tcg_out_insn(s, RR, NR, args[0], args[2]);
+        }
         break;
     case INDEX_op_or_i32:
         tcg_out_insn(s, RR, OR, args[0], args[2]);
@@ -1039,7 +1151,11 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_and_i64:
-        tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+        if (const_args[2]) {
+            tgen64_andi(s, args[0], args[2]);
+        } else {
+            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+        }
         break;
     case INDEX_op_or_i64:
         tcg_out_insn(s, RRE, OGR, args[0], args[2]);
@@ -1297,7 +1413,7 @@  static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
 
-    { INDEX_op_and_i32, { "r", "0", "r" } },
+    { INDEX_op_and_i32, { "r", "0", "ri" } },
     { INDEX_op_or_i32, { "r", "0", "r" } },
     { INDEX_op_xor_i32, { "r", "0", "r" } },
     { INDEX_op_neg_i32, { "r", "r" } },
@@ -1358,7 +1474,7 @@  static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
 
-    { INDEX_op_and_i64, { "r", "0", "r" } },
+    { INDEX_op_and_i64, { "r", "0", "ri" } },
     { INDEX_op_or_i64, { "r", "0", "r" } },
     { INDEX_op_xor_i64, { "r", "0", "r" } },
     { INDEX_op_neg_i64, { "r", "r" } },