@@ -36,6 +36,7 @@
#define TCG_CT_CONST_32 0x100
#define TCG_CT_CONST_NEG 0x200
#define TCG_CT_CONST_ADDI 0x400
+#define TCG_CT_CONST_ANDI 0x800
/* Several places within the instruction set 0 means "no register"
rather than TCG_REG_R0. */
@@ -61,6 +62,8 @@ typedef enum S390Opcode {
RIL_LGFI = 0xc001,
RIL_LLIHF = 0xc00e,
RIL_LLILF = 0xc00f,
+ RIL_NIHF = 0xc00a,
+ RIL_NILF = 0xc00b,
RI_AGHI = 0xa70b,
RI_AHI = 0xa70a,
@@ -74,6 +77,10 @@ typedef enum S390Opcode {
RI_LLIHL = 0xa50d,
RI_LLILH = 0xa50e,
RI_LLILL = 0xa50f,
+ RI_NIHH = 0xa504,
+ RI_NIHL = 0xa505,
+ RI_NILH = 0xa506,
+ RI_NILL = 0xa507,
RRE_AGR = 0xb908,
RRE_CGR = 0xb920,
@@ -319,6 +326,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
ct->ct &= ~TCG_CT_REG;
ct->ct |= TCG_CT_CONST_ADDI;
break;
+ case 'A':
+ ct->ct &= ~TCG_CT_REG;
+ ct->ct |= TCG_CT_CONST_ANDI;
+ break;
default:
break;
}
@@ -328,9 +339,66 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
return 0;
}
+/* Immediates to be used with logical AND. This is an optimization only,
+ since a full 64-bit immediate AND can always be performed with 4 sequential
+ NI[LH][LH] instructions. What we're looking for is immediates that we
+ can load efficiently, and the immediate load plus the reg-reg AND is
+ smaller than the sequential NI's. */
+
+static int tcg_match_andi(int ct, tcg_target_ulong val)
+{
+ int i;
+
+ if (facilities & FACILITY_EXT_IMM) {
+ if (ct & TCG_CT_CONST_32) {
+ /* All 32-bit ANDs can be performed with 1 48-bit insn. */
+ return 1;
+ }
+
+ /* Zero-extensions. */
+ if (val == 0xff || val == 0xffff || val == 0xffffffff) {
+ return 1;
+ }
+ } else {
+ if (ct & TCG_CT_CONST_32) {
+ val = (uint32_t)val;
+ } else if (val == 0xffffffff) {
+ return 1;
+ }
+ }
+
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if ((val & mask) == mask) {
+ return 1;
+ }
+ }
+
+ /* Look for 16-bit values performing the mask. These are better
+ to load with LLI[LH][LH]. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = 0xffffull << i*16;
+ if ((val & mask) == val) {
+ return 0;
+ }
+ }
+
+ /* Look for 32-bit values performing the 64-bit mask. These
+ are better to load with LLI[LH]F, or if extended immediates
+ not available, with a pair of LLI insns. */
+ if ((ct & TCG_CT_CONST_32) == 0) {
+ if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
/* Test if a constant matches the constraint. */
-static inline int tcg_target_const_match(tcg_target_long val,
- const TCGArgConstraint *arg_ct)
+static int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
{
int ct = arg_ct->ct;
@@ -357,6 +425,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
} else {
return val == (int16_t)val;
}
+ } else if (ct & TCG_CT_CONST_ANDI) {
+ return tcg_match_andi(ct, val);
}
return 0;
@@ -703,6 +773,74 @@ static void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val)
}
+static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+ static const S390Opcode ni_insns[4] = {
+ RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
+ };
+ static const S390Opcode nif_insns[2] = {
+ RIL_NILF, RIL_NIHF
+ };
+
+ int i;
+
+ /* Look for no-op. */
+ if (val == -1) {
+ return;
+ }
+
+ /* Look for the zero-extensions. */
+ if (val == 0xffffffff) {
+ tgen_ext32u(s, dest, dest);
+ return;
+ }
+
+ if (facilities & FACILITY_EXT_IMM) {
+ if (val == 0xff) {
+ tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
+ return;
+ }
+ if (val == 0xffff) {
+ tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
+ return;
+ }
+
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if ((val & mask) == mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ return;
+ }
+ }
+
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (facilities & FACILITY_EXT_IMM) {
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = ~(0xffffffffull << i*32);
+ if ((val & mask) == mask) {
+ tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ return;
+ }
+ }
+ }
+
+ /* Perform the AND via sequential modifications to the high and low
+ parts. Do this via recursion to handle 16-bit vs 32-bit masks in
+ each half. */
+ tgen64_andi(s, dest, val | 0xffffffff00000000ull);
+ tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+ } else {
+ /* With no extended-immediate facility, just emit the sequence. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = 0xffffull << i*16;
+ if ((val & mask) != mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ }
+ }
+ }
+}
+
static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2)
{
if (c > TCG_COND_GT) {
@@ -776,6 +914,16 @@ static void tgen_calli(TCGContext *s, tcg_target_long dest)
}
#if defined(CONFIG_SOFTMMU)
+static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+ if (tcg_match_andi(0, val)) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
+ tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
+ } else {
+ tgen64_andi(s, dest, val);
+ }
+}
+
static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg,
int mem_index, int opc,
uint16_t **label2_ptr_p, int is_store)
@@ -803,13 +951,8 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg,
tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
- TARGET_PAGE_MASK | ((1 << s_bits) - 1));
- tcg_out_insn(s, RRE, NGR, arg0, TCG_TMP0);
-
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
- (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
- tcg_out_insn(s, RRE, NGR, arg1, TCG_TMP0);
+ tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+ tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
if (is_store) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
@@ -1178,7 +1321,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_and_i32:
- tcg_out_insn(s, RR, NR, args[0], args[2]);
+ if (const_args[2]) {
+ tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
+ } else {
+ tcg_out_insn(s, RR, NR, args[0], args[2]);
+ }
break;
case INDEX_op_or_i32:
tcg_out_insn(s, RR, OR, args[0], args[2]);
@@ -1188,7 +1335,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_and_i64:
- tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+ if (const_args[2]) {
+ tgen64_andi(s, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+ }
break;
case INDEX_op_or_i64:
tcg_out_insn(s, RRE, OGR, args[0], args[2]);
@@ -1454,9 +1605,10 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
- { INDEX_op_and_i32, { "r", "0", "r" } },
+ { INDEX_op_and_i32, { "r", "0", "rWA" } },
{ INDEX_op_or_i32, { "r", "0", "r" } },
{ INDEX_op_xor_i32, { "r", "0", "r" } },
+
{ INDEX_op_neg_i32, { "r", "r" } },
{ INDEX_op_shl_i32, { "r", "0", "Ri" } },
@@ -1515,9 +1667,10 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
- { INDEX_op_and_i64, { "r", "0", "r" } },
+ { INDEX_op_and_i64, { "r", "0", "rA" } },
{ INDEX_op_or_i64, { "r", "0", "r" } },
{ INDEX_op_xor_i64, { "r", "0", "r" } },
+
{ INDEX_op_neg_i64, { "r", "r" } },
{ INDEX_op_shl_i64, { "r", "r", "Ri" } },
Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/s390/tcg-target.c | 179 +++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 166 insertions(+), 13 deletions(-)