diff mbox series

[committed] aarch64: Fix PR96998 and restore code quality in combine

Message ID 20201030092204.fd3vj5q4jggwunsw@arm.com
State New
Headers show
Series [committed] aarch64: Fix PR96998 and restore code quality in combine | expand

Commit Message

Alex Coplan Oct. 30, 2020, 9:22 a.m. UTC
This patch fixes a bug in the AArch64 backend. Currently, we accept an
odd sign_extract representation of addresses, but don't accept that same
odd form of address as an LEA.

This is the cause of PR96998. In the testcase given in the PR, combine
produces:

(insn 9 8 10 3 (set (mem:SI (plus:DI (sign_extract:DI (mult:DI (subreg:DI (reg/v:SI 92 [ g ]) 0)
                        (const_int 4 [0x4]))
                    (const_int 34 [0x22])
                    (const_int 0 [0]))
                (reg/f:DI 96)) [3 *i_5+0 S4 A32])
        (asm_operands:SI ("") ("=Q") 0 []
             []
             [] test.c:11)) "test.c":11:5 -1
     (expr_list:REG_DEAD (reg/v:SI 92 [ g ])
        (nil)))

Then LRA reloads the address and we ICE because we fail to recognize the
sign_extract outside the mem:

(insn 33 8 34 3 (set (reg:DI 100)
        (sign_extract:DI (ashift:DI (subreg:DI (reg/v:SI 92 [ g ]) 0)
                (const_int 2 [0x2]))
            (const_int 34 [0x22])
            (const_int 0 [0]))) "test.c":11:5 -1
     (nil))

The aarch64 changes here remove the support for this sign_extract
representation of addresses, fixing PR96998. Now this by itself would
regress code quality, so this change is paired with an improvement to
combine which prevents an extract rtx from being emitted in this case:
we now write the rtx above as a shift of an extend, which allows the
combination to go ahead.

Prior to this, combine.c:make_extraction() identified where we can emit
an ashift of an extend in place of an extraction, but failed to make the
corresponding canonicalization/simplification when presented with a mult
by a power of two. Such a representation is canonical when representing
a left-shifted address inside a mem.

This change remedies this situation. For rtxes such as:

(mult:DI (subreg:DI (reg:SI r) 0) (const_int 2^n))

where the bottom 32 + n bits are valid (the higher-order bits are
undefined) and make_extraction() is being asked to sign_extract the
lower (valid) bits, after the patch, we rewrite this as:

(mult:DI (sign_extend:DI (reg:SI r)) (const_int 2^n))

instead of using a sign_extract.

gcc/ChangeLog:

	PR target/96998
	* combine.c (make_extraction): Also handle shifts written as
	(mult x 2^n), avoid creating an extract rtx for these.
	* config/aarch64/aarch64.c (aarch64_is_extend_from_extract): Delete.
	(aarch64_classify_index): Remove extract-based address handling.
	(aarch64_strip_extend): Likewise.
	(aarch64_rtx_arith_op_extract_p): Likewise, remove now-unused parameter.
	Update callers...
	(aarch64_rtx_costs): ... here.

gcc/testsuite/ChangeLog:

	PR target/96998
	* gcc.c-torture/compile/pr96998.c: New test.

---

aarch64 change approved here:
https://gcc.gnu.org/pipermail/gcc-patches/2020-October/556267.html

combine change approved here:
https://gcc.gnu.org/pipermail/gcc-patches/2020-October/557387.html

Committing as one change to avoid regressing the trunk.

Testing:
 * Bootstrapped and regtested on aarch64-linux-gnu, arm-linux-gnueabihf,
   and x86_64-linux-gnu: no regressions.

Pushed to master.

Thanks,
Alex
diff mbox series

Patch

diff --git a/gcc/combine.c b/gcc/combine.c
index 4782e1d9dcc..ed1ad45de83 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7665,6 +7665,24 @@  make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
       if (new_rtx != 0)
 	return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
     }
+  else if (GET_CODE (inner) == MULT
+	   && CONST_INT_P (XEXP (inner, 1))
+	   && pos_rtx == 0 && pos == 0)
+    {
+      /* We're extracting the least significant bits of an rtx
+	 (mult X (const_int 2^C)), where LEN > C.  Extract the
+	 least significant (LEN - C) bits of X, giving an rtx
+	 whose mode is MODE, then multiply it by 2^C.  */
+      const HOST_WIDE_INT shift_amt = exact_log2 (INTVAL (XEXP (inner, 1)));
+      if (IN_RANGE (shift_amt, 1, len - 1))
+	{
+	  new_rtx = make_extraction (mode, XEXP (inner, 0),
+				     0, 0, len - shift_amt,
+				     unsignedp, in_dest, in_compare);
+	  if (new_rtx)
+	    return gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1));
+	}
+    }
   else if (GET_CODE (inner) == TRUNCATE
 	   /* If trying or potentionally trying to extract
 	      bits outside of is_mode, don't look through
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 35d6f2e2f01..db991e59cbe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2886,33 +2886,6 @@  aarch64_is_noplt_call_p (rtx sym)
   return false;
 }
 
-/* Return true if the offsets to a zero/sign-extract operation
-   represent an expression that matches an extend operation.  The
-   operands represent the parameters from
-
-   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
-bool
-aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
-				rtx extract_imm)
-{
-  HOST_WIDE_INT mult_val, extract_val;
-
-  if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
-    return false;
-
-  mult_val = INTVAL (mult_imm);
-  extract_val = INTVAL (extract_imm);
-
-  if (extract_val > 8
-      && extract_val < GET_MODE_BITSIZE (mode)
-      && exact_log2 (extract_val & ~7) > 0
-      && (extract_val & 7) <= 4
-      && mult_val == (1 << (extract_val & 7)))
-    return true;
-
-  return false;
-}
-
 /* Emit an insn that's a simple single-set.  Both the operands must be
    known to be valid.  */
 inline static rtx_insn *
@@ -8936,22 +8909,6 @@  aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       index = XEXP (XEXP (x, 0), 0);
       shift = INTVAL (XEXP (x, 1));
     }
-  /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == MULT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (mult:DI (reg:DI) (const_int scale))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -8967,22 +8924,6 @@  aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
 	shift = -1;
     }
-  /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == ASHIFT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = INTVAL (XEXP (XEXP (x, 0), 1));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -11360,16 +11301,6 @@  aarch64_strip_extend (rtx x, bool strip_shift)
   if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
     return op;
 
-  /* Zero and sign extraction of a widened value.  */
-  if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
-      && XEXP (op, 2) == const0_rtx
-      && GET_CODE (XEXP (op, 0)) == MULT
-      && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
-					 XEXP (op, 1)))
-    return XEXP (XEXP (op, 0), 0);
-
-  /* It can also be represented (for zero-extend) as an AND with an
-     immediate.  */
   if (GET_CODE (op) == AND
       && GET_CODE (XEXP (op, 0)) == MULT
       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
@@ -11704,35 +11635,15 @@  aarch64_branch_cost (bool speed_p, bool predictable_p)
     return branch_costs->unpredictable;
 }
 
-/* Return true if the RTX X in mode MODE is a zero or sign extract
+/* Return true if X is a zero or sign extract
    usable in an ADD or SUB (extended register) instruction.  */
 static bool
-aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
-{
-  /* Catch add with a sign extract.
-     This is add_<optab><mode>_multp2.  */
-  if (GET_CODE (x) == SIGN_EXTRACT
-      || GET_CODE (x) == ZERO_EXTRACT)
-    {
-      rtx op0 = XEXP (x, 0);
-      rtx op1 = XEXP (x, 1);
-      rtx op2 = XEXP (x, 2);
-
-      if (GET_CODE (op0) == MULT
-	  && CONST_INT_P (op1)
-	  && op2 == const0_rtx
-	  && CONST_INT_P (XEXP (op0, 1))
-	  && aarch64_is_extend_from_extract (mode,
-					     XEXP (op0, 1),
-					     op1))
-	{
-	  return true;
-	}
-    }
+aarch64_rtx_arith_op_extract_p (rtx x)
+{
   /* The simple case <ARITH>, XD, XN, XM, [us]xt.
      No shift.  */
-  else if (GET_CODE (x) == SIGN_EXTEND
-	   || GET_CODE (x) == ZERO_EXTEND)
+  if (GET_CODE (x) == SIGN_EXTEND
+      || GET_CODE (x) == ZERO_EXTEND)
     return REG_P (XEXP (x, 0));
 
   return false;
@@ -12419,8 +12330,8 @@  cost_minus:
 	  }
 
 	/* Look for SUB (extended register).  */
-	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op1, int_mode))
+	if (is_a <scalar_int_mode> (mode)
+	    && aarch64_rtx_arith_op_extract_p (op1))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
@@ -12499,8 +12410,8 @@  cost_plus:
 	*cost += rtx_cost (op1, mode, PLUS, 1, speed);
 
 	/* Look for ADD (extended register).  */
-	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op0, int_mode))
+	if (is_a <scalar_int_mode> (mode)
+	    && aarch64_rtx_arith_op_extract_p (op0))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96998.c b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
new file mode 100644
index 00000000000..a75d5dcfe08
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target arm*-*-* aarch64*-*-* } } */
+
+int h(void);
+struct c d;
+struct c {
+  int e[1];
+};
+
+void f(void) {
+  int g;
+  for (;; g = h()) {
+    int *i = &d.e[g];
+    asm("" : "=Q"(*i));
+  }
+}