diff mbox

[aarch64] Fix 70048

Message ID 56E9CF47.5040806@redhat.com
State New
Headers show

Commit Message

Richard Henderson March 16, 2016, 9:25 p.m. UTC
This fixes only the regression described in the PR.

There was quite a bit of follow-up that points to new work that ought to be 
done during the gcc7 cycle, but isn't really appropriate now.

Tested on aarch64-linux; committed as reviewed in the PR.


r~
PR target/70048
        * config/aarch64/aarch64.c (virt_or_elim_regno_p): New.
        (aarch64_classify_address): Use it.
        (aarch64_legitimize_address): Force all subexpressions of PLUS
        into registers.  Simplify as (sfp+const)+reg or (reg+reg)+const.

Comments

James Greenhalgh March 17, 2016, 12:33 p.m. UTC | #1
On Wed, Mar 16, 2016 at 02:25:27PM -0700, Richard Henderson wrote:
>         PR target/70048
>         * config/aarch64/aarch64.c (virt_or_elim_regno_p): New.
>         (aarch64_classify_address): Use it.
>         (aarch64_legitimize_address): Force all subexpressions of PLUS
>         into registers.  Simplify as (sfp+const)+reg or (reg+reg)+const.
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index cf1239d..12e498d 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3847,6 +3847,18 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
>  	     && GET_MODE_SIZE (mode) == 8);
>  }
>  
> +/* Return true if REGNO is a virtual pointer register, or an eliminable
> +   "soft" frame register.  Like REGNO_PTR_FRAME_P except that we don't
> +   include stack_pointer or hard_frame_pointer.  */

In that case, do we want to write this as:

  return REGNO_PTR_FRAME_P (regno)
	 && regno != STACK_POINTER_REGNUM
	 && regno != HARD_FRAME_POINTER_REGNUM;

for clarity?

> +static bool
> +virt_or_elim_regno_p (unsigned regno)

Most functions in here get the "aarch64" in their name even if they are
static.

> +{
> +  return ((regno >= FIRST_VIRTUAL_REGISTER
> +	   && regno <= LAST_VIRTUAL_POINTER_REGISTER)
> +	  || regno == FRAME_POINTER_REGNUM
> +	  || regno == ARG_POINTER_REGNUM);
> +}

Otherwise, this looks OK to me. Thanks for the fix.

James
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cf1239d..12e498d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3847,6 +3847,18 @@  aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
 	     && GET_MODE_SIZE (mode) == 8);
 }
 
+/* Return true if REGNO is a virtual pointer register, or an eliminable
+   "soft" frame register.  Like REGNO_PTR_FRAME_P except that we don't
+   include stack_pointer or hard_frame_pointer.  */
+static bool
+virt_or_elim_regno_p (unsigned regno)
+{
+  return ((regno >= FIRST_VIRTUAL_REGISTER
+	   && regno <= LAST_VIRTUAL_POINTER_REGISTER)
+	  || regno == FRAME_POINTER_REGNUM
+	  || regno == ARG_POINTER_REGNUM);
+}
+
 /* Return true if X is a valid address for machine mode MODE.  If it is,
    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
@@ -3890,9 +3902,7 @@  aarch64_classify_address (struct aarch64_address_info *info,
 
       if (! strict_p
 	  && REG_P (op0)
-	  && (op0 == virtual_stack_vars_rtx
-	      || op0 == frame_pointer_rtx
-	      || op0 == arg_pointer_rtx)
+	  && virt_or_elim_regno_p (REGNO (op0))
 	  && CONST_INT_P (op1))
 	{
 	  info->type = ADDRESS_REG_IMM;
@@ -4953,74 +4963,43 @@  aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
 
   if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
     {
-      HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
-      HOST_WIDE_INT base_offset;
+      rtx base = XEXP (x, 0);
+      rtx offset_rtx XEXP (x, 1);
+      HOST_WIDE_INT offset = INTVAL (offset_rtx);
 
-      if (GET_CODE (XEXP (x, 0)) == PLUS)
+      if (GET_CODE (base) == PLUS)
 	{
-	  rtx op0 = XEXP (XEXP (x, 0), 0);
-	  rtx op1 = XEXP (XEXP (x, 0), 1);
+	  rtx op0 = XEXP (base, 0);
+	  rtx op1 = XEXP (base, 1);
 
-	  /* Address expressions of the form Ra + Rb + CONST.
+	  /* Force any scaling into a temp for CSE.  */
+	  op0 = force_reg (Pmode, op0);
+	  op1 = force_reg (Pmode, op1);
 
-	     If CONST is within the range supported by the addressing
-	     mode "reg+offset", do not split CONST and use the
-	     sequence
-	       Rt = Ra + Rb;
-	       addr = Rt + CONST.  */
-	  if (REG_P (op0) && REG_P (op1))
-	    {
-	      machine_mode addr_mode = GET_MODE (x);
-	      rtx base = gen_reg_rtx (addr_mode);
-	      rtx addr = plus_constant (addr_mode, base, offset);
+	  /* Let the pointer register be in op0.  */
+	  if (REG_POINTER (op1))
+	    std::swap (op0, op1);
 
-	      if (aarch64_legitimate_address_hook_p (mode, addr, false))
-		{
-		  emit_insn (gen_adddi3 (base, op0, op1));
-		  return addr;
-		}
-	    }
-	  /* Address expressions of the form Ra + Rb<<SCALE + CONST.
-
-	     If Reg + Rb<<SCALE is a valid address expression, do not
-	     split CONST and use the sequence
-	       Rc = CONST;
-	       Rt = Ra + Rc;
-	       addr = Rt + Rb<<SCALE.
-
-	     TODO: We really should split CONST out of memory referece
-	     because:
-	       a) We depend on GIMPLE optimizers to pick up common sub
-		  expression involving the scaling operation.
-	       b) The index Rb is likely a loop iv, it's better to split
-		  the CONST so that computation of new base Rt is a loop
-		  invariant and can be moved out of loop.  This is more
-		  important when the original base Ra is sfp related.
-
-	     Unfortunately, GIMPLE optimizers (e.g., SLSR) can not handle
-	     this kind of CSE opportunity at the time of this change, we
-	     have to force register scaling expr out of memory ref now.  */
-	  else if (REG_P (op0) || REG_P (op1))
+	  /* If the pointer is virtual or frame related, then we know that
+	     virtual register instantiation or register elimination is going
+	     to apply a second constant.  We want the two constants folded
+	     together easily.  Therefore, emit as (OP0 + CONST) + OP1.  */
+	  if (virt_or_elim_regno_p (REGNO (op0)))
 	    {
-	      machine_mode addr_mode = GET_MODE (x);
-	      rtx base = gen_reg_rtx (addr_mode);
-
-	      /* Switch to make sure that register is in op0.  */
-	      if (REG_P (op1))
-		std::swap (op0, op1);
-
-	      rtx addr = plus_constant (addr_mode, base, offset);
-
-	      if (aarch64_legitimate_address_hook_p (mode, addr, false))
-		{
-		  base = force_operand (gen_rtx_PLUS (addr_mode, op1, op0),
-					NULL_RTX);
-		  return plus_constant (addr_mode, base, offset);
-		}
+	      base = expand_binop (Pmode, add_optab, op0, offset_rtx,
+				   NULL_RTX, true, OPTAB_DIRECT);
+	      return gen_rtx_PLUS (Pmode, base, op1);
 	    }
+
+	  /* Otherwise, in order to encourage CSE (and thence loop strength
+	     reduce) scaled addresses, emit as (OP0 + OP1) + CONST.  */
+	  base = expand_binop (Pmode, add_optab, op0, op1,
+			       NULL_RTX, true, OPTAB_DIRECT);
+	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
 	}
 
       /* Does it look like we'll need a load/store-pair operation?  */
+      HOST_WIDE_INT base_offset;
       if (GET_MODE_SIZE (mode) > 16
 	  || mode == TImode)
 	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
@@ -5032,15 +5011,12 @@  aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
       else
 	base_offset = offset & ~0xfff;
 
-      if (base_offset == 0)
-	return x;
-
-      offset -= base_offset;
-      rtx base_reg = gen_reg_rtx (Pmode);
-      rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
-			   NULL_RTX);
-      emit_move_insn (base_reg, val);
-      x = plus_constant (Pmode, base_reg, offset);
+      if (base_offset != 0)
+	{
+	  base = plus_constant (Pmode, base, base_offset);
+	  base = force_operand (base, NULL_RTX);
+	  return plus_constant (Pmode, base, offset - base_offset);
+	}
     }
 
   return x;