diff mbox series

[1/2] aarch64: Remove support for extract-based addresses [PR96998]

Message ID 20201015085843.k6hwjoxd6ii3osyb@arm.com
State New
Headers show
Series [1/2] aarch64: Remove support for extract-based addresses [PR96998] | expand

Commit Message

Alex Coplan Oct. 15, 2020, 8:58 a.m. UTC
This patch fixes a bug in the AArch64 backend. Currently, we accept an
odd sign_extract representation of addresses, but don't accept that same
odd form of address as an LEA.

This is the cause of PR96998. In the testcase given in the PR, combine
produces:

(insn 9 8 10 3 (set (mem:SI (plus:DI (sign_extract:DI (mult:DI (subreg:DI (reg/v:SI 92 [ g ]) 0)
                        (const_int 4 [0x4]))
                    (const_int 34 [0x22])
                    (const_int 0 [0]))
                (reg/f:DI 96)) [3 *i_5+0 S4 A32])
        (asm_operands:SI ("") ("=Q") 0 []
             []
             [] test.c:11)) "test.c":11:5 -1
     (expr_list:REG_DEAD (reg/v:SI 92 [ g ])
        (nil)))

Then LRA reloads the address and we ICE because we fail to recognize the
sign_extract:

(insn 33 8 34 3 (set (reg:DI 100)
        (sign_extract:DI (ashift:DI (subreg:DI (reg/v:SI 92 [ g ]) 0)
                (const_int 2 [0x2]))
            (const_int 34 [0x22])
            (const_int 0 [0]))) "test.c":11:5 -1
     (nil))

This patch removes the support for this sign_extract representation of
addresses, fixing PR96998.

Now this by itself will cause code quality regressions. So this patch is
paired with a follow-on patch to combine that allows us to write this
sign_extract operation as an extend instead, which will allow the
combination to go ahead with the simpler canonical form.

For the testcase in the PR, prior to the ICE being introduced, we had
the following assembly for the body of the loop (at -O2):

.L2:
        add     x0, x19, x0, sxtw 2     // 39   [c=8 l=4] *add_extvdi_multp2
        bl      h               // 11   [c=4 l=4]  *call_value_insn/1
        b       .L2             // 54   [c=4 l=4]  jump

After this patch, the ICE is fixed, but we have the less optimal:

.L2:
        sxtw    x0, w0  // 8    [c=4 l=4]  *extendsidi2_aarch64/0
        add     x0, x19, x0, lsl 2      // 39   [c=8 l=4]  *add_lsl_di
        bl      h               // 11   [c=4 l=4]  *call_value_insn/1
        b       .L2             // 54   [c=4 l=4]  jump

The follow-on patch to combine will fix this code quality regression, as
well as the following two regressions introduced by this patch:

PASS->FAIL: gcc.target/aarch64/extend.c scan-assembler ldr\tw[0-9]+,.*sxtw #?2]
PASS->FAIL: gcc.target/aarch64/index.c scan-assembler-not [us]xtw\t

Bootstrapped and regtested on aarch64-none-linux-gnu.

OK for trunk (with follow-on patch to combine?)

Thanks,
Alex

---

gcc/ChangeLog:

	PR target/96998
	* config/aarch64/aarch64.c (aarch64_is_extend_from_extract): Delete.
	(aarch64_classify_index): Remove extract-based address handling.
	(aarch64_strip_extend): Likewise.
	(aarch64_rtx_arith_op_extract_p): Likewise, remove now-unsued parameter.
	Update callers...
	(aarch64_rtx_costs): ... here.

gcc/testsuite/ChangeLog:

	PR target/96998
	* gcc.c-torture/compile/pr96998.c: New test.

---
 gcc/config/aarch64/aarch64.c                  | 105 ++----------------
 gcc/testsuite/gcc.c-torture/compile/pr96998.c |  15 +++
 2 files changed, 23 insertions(+), 97 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr96998.c

Comments

Richard Sandiford Oct. 15, 2020, 11:16 a.m. UTC | #1
Alex Coplan <alex.coplan@arm.com> writes:
> @@ -11707,32 +11638,12 @@ aarch64_branch_cost (bool speed_p, bool predictable_p)
>  /* Return true if the RTX X in mode MODE is a zero or sign extract
>     usable in an ADD or SUB (extended register) instruction.  */
>  static bool
> -aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
> -{
> -  /* Catch add with a sign extract.
> -     This is add_<optab><mode>_multp2.  */
> -  if (GET_CODE (x) == SIGN_EXTRACT
> -      || GET_CODE (x) == ZERO_EXTRACT)
> -    {
> -      rtx op0 = XEXP (x, 0);
> -      rtx op1 = XEXP (x, 1);
> -      rtx op2 = XEXP (x, 2);
> -
> -      if (GET_CODE (op0) == MULT
> -	  && CONST_INT_P (op1)
> -	  && op2 == const0_rtx
> -	  && CONST_INT_P (XEXP (op0, 1))
> -	  && aarch64_is_extend_from_extract (mode,
> -					     XEXP (op0, 1),
> -					     op1))
> -	{
> -	  return true;
> -	}
> -    }
> +aarch64_rtx_arith_op_extract_p (rtx x)
> +{
>    /* The simple case <ARITH>, XD, XN, XM, [us]xt.
>       No shift.  */
> -  else if (GET_CODE (x) == SIGN_EXTEND
> -	   || GET_CODE (x) == ZERO_EXTEND)
> +  if (GET_CODE (x) == SIGN_EXTEND
> +      || GET_CODE (x) == ZERO_EXTEND)
>      return REG_P (XEXP (x, 0));
>  
>    return false;

Didn't notice this last time, sorry, but:

s/the RTX X in mode MODE/X/

now that there's no longer a mode parameter.

OK with that change if 2/2 is approved.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a8cc545c370..562ccd87ff7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2886,33 +2886,6 @@  aarch64_is_noplt_call_p (rtx sym)
   return false;
 }
 
-/* Return true if the offsets to a zero/sign-extract operation
-   represent an expression that matches an extend operation.  The
-   operands represent the parameters from
-
-   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
-bool
-aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
-				rtx extract_imm)
-{
-  HOST_WIDE_INT mult_val, extract_val;
-
-  if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
-    return false;
-
-  mult_val = INTVAL (mult_imm);
-  extract_val = INTVAL (extract_imm);
-
-  if (extract_val > 8
-      && extract_val < GET_MODE_BITSIZE (mode)
-      && exact_log2 (extract_val & ~7) > 0
-      && (extract_val & 7) <= 4
-      && mult_val == (1 << (extract_val & 7)))
-    return true;
-
-  return false;
-}
-
 /* Emit an insn that's a simple single-set.  Both the operands must be
    known to be valid.  */
 inline static rtx_insn *
@@ -8936,22 +8909,6 @@  aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       index = XEXP (XEXP (x, 0), 0);
       shift = INTVAL (XEXP (x, 1));
     }
-  /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == MULT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (mult:DI (reg:DI) (const_int scale))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -8967,22 +8924,6 @@  aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
 	shift = -1;
     }
-  /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == ASHIFT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = INTVAL (XEXP (XEXP (x, 0), 1));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -11360,16 +11301,6 @@  aarch64_strip_extend (rtx x, bool strip_shift)
   if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
     return op;
 
-  /* Zero and sign extraction of a widened value.  */
-  if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
-      && XEXP (op, 2) == const0_rtx
-      && GET_CODE (XEXP (op, 0)) == MULT
-      && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
-					 XEXP (op, 1)))
-    return XEXP (XEXP (op, 0), 0);
-
-  /* It can also be represented (for zero-extend) as an AND with an
-     immediate.  */
   if (GET_CODE (op) == AND
       && GET_CODE (XEXP (op, 0)) == MULT
       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
@@ -11707,32 +11638,12 @@  aarch64_branch_cost (bool speed_p, bool predictable_p)
 /* Return true if the RTX X in mode MODE is a zero or sign extract
    usable in an ADD or SUB (extended register) instruction.  */
 static bool
-aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
-{
-  /* Catch add with a sign extract.
-     This is add_<optab><mode>_multp2.  */
-  if (GET_CODE (x) == SIGN_EXTRACT
-      || GET_CODE (x) == ZERO_EXTRACT)
-    {
-      rtx op0 = XEXP (x, 0);
-      rtx op1 = XEXP (x, 1);
-      rtx op2 = XEXP (x, 2);
-
-      if (GET_CODE (op0) == MULT
-	  && CONST_INT_P (op1)
-	  && op2 == const0_rtx
-	  && CONST_INT_P (XEXP (op0, 1))
-	  && aarch64_is_extend_from_extract (mode,
-					     XEXP (op0, 1),
-					     op1))
-	{
-	  return true;
-	}
-    }
+aarch64_rtx_arith_op_extract_p (rtx x)
+{
   /* The simple case <ARITH>, XD, XN, XM, [us]xt.
      No shift.  */
-  else if (GET_CODE (x) == SIGN_EXTEND
-	   || GET_CODE (x) == ZERO_EXTEND)
+  if (GET_CODE (x) == SIGN_EXTEND
+      || GET_CODE (x) == ZERO_EXTEND)
     return REG_P (XEXP (x, 0));
 
   return false;
@@ -12419,8 +12330,8 @@  cost_minus:
 	  }
 
 	/* Look for SUB (extended register).  */
-	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op1, int_mode))
+	if (is_a <scalar_int_mode> (mode)
+	    && aarch64_rtx_arith_op_extract_p (op1))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
@@ -12499,8 +12410,8 @@  cost_plus:
 	*cost += rtx_cost (op1, mode, PLUS, 1, speed);
 
 	/* Look for ADD (extended register).  */
-	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op0, int_mode))
+	if (is_a <scalar_int_mode> (mode)
+	    && aarch64_rtx_arith_op_extract_p (op0))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96998.c b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
new file mode 100644
index 00000000000..a75d5dcfe08
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target arm*-*-* aarch64*-*-* } } */
+
+int h(void);
+struct c d;
+struct c {
+  int e[1];
+};
+
+void f(void) {
+  int g;
+  for (;; g = h()) {
+    int *i = &d.e[g];
+    asm("" : "=Q"(*i));
+  }
+}