[RFC] fwprop address cost changes

Message ID 5f8d8706-9c2f-cb38-de23-f752cc0457b3@linux.ibm.com
State New
Headers show
Series
  • [RFC] fwprop address cost changes
Related show

Commit Message

Robin Dapp July 11, 2018, 8:13 a.m.
Hi,

we recently hit a problem where fwprop would not propagate a memory
address into an insn because our backend (s390) tells it that the
address_cost ()s for an address with index are higher than for one
without. Subsequently, should_replace_address () returns false and no
propagation is performed.

This checks seems to be just an early bail out since, when disabling it,
try_fwprop_subst () still checks src costs and would allow the
propagation. The problem is, though, that it relies on the newly
propagated-into insn already created before checking the costs so it
cannot be called at the same place should_replace_address () is being
called.

In this patch I quickly worked around this, adding an update flag to
try_fwprop_subst () that, when set to false, does not actually commit
the propagation but still checks costs. I'm sure there is a better and
much smaller way and I don't indent to apply this in its current state
(there's a lot of boilerplate code to keep default behavior) but it
might serve as basis for discussion/ideas. Richard mentioned the
insn_cost hook, but this would require the insn to exist as well.

Regards
 Robin

Patch

diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 0fca0f1edbc..6eeb77b93ed 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -392,7 +392,7 @@  canonicalize_address (rtx x)
 
 static bool
 should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
-			addr_space_t as, bool speed)
+			addr_space_t as, bool speed, bool cost_addr)
 {
   int gain;
 
@@ -405,8 +405,11 @@  should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
     return true;
 
   /* Prefer the new address if it is less expensive.  */
-  gain = (address_cost (old_rtx, mode, as, speed)
-	  - address_cost (new_rtx, mode, as, speed));
+  if (cost_addr)
+    gain = (address_cost (old_rtx, mode, as, speed)
+	    - address_cost (new_rtx, mode, as, speed));
+  else
+    gain = 0;
 
   /* If the addresses have equivalent cost, prefer the new address
      if it has the highest `set_src_cost'.  That has the potential of
@@ -448,6 +451,10 @@  enum {
   PR_OPTIMIZE_FOR_SPEED = 4
 };
 
+static bool
+try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
+		  bool set_reg_equal, bool update);
+
 
 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
    resulting expression.  Replace *PX with a new RTL expression if an
@@ -458,7 +465,11 @@  enum {
    that is because there is no simplify_gen_* function for LO_SUM).  */
 
 static bool
-propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
+propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags);
+
+static bool
+propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags,
+		 rtx *loc, df_ref use, rtx_insn *def_insn, bool set_reg_equal)
 {
   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
   enum rtx_code code = GET_CODE (x);
@@ -491,7 +502,8 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
     case RTX_UNARY:
       op0 = XEXP (x, 0);
       op_mode = GET_MODE (op0);
-      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
+      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
       if (op0 == XEXP (x, 0))
 	return true;
       tem = simplify_gen_unary (code, mode, op0, op_mode);
@@ -501,8 +513,10 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
     case RTX_COMM_ARITH:
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
-      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
-      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
+      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
+      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 	return true;
       tem = simplify_gen_binary (code, mode, op0, op1);
@@ -513,8 +527,10 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
-      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
-      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
+      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
+      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 	return true;
       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
@@ -526,9 +542,12 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
       op1 = XEXP (x, 1);
       op2 = XEXP (x, 2);
       op_mode = GET_MODE (op0);
-      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
-      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
-      valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
+      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
+      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
+      valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags,
+				    loc, use, def_insn, set_reg_equal);
       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 	return true;
       if (op_mode == VOIDmode)
@@ -541,7 +560,8 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
       if (code == SUBREG)
 	{
           op0 = XEXP (x, 0);
-	  valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
+	  valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags,
+					loc, use, def_insn, set_reg_equal);
           if (op0 == XEXP (x, 0))
 	    return true;
 	  tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
@@ -561,7 +581,8 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 
 	  op0 = new_op0 = targetm.delegitimize_address (op0);
 	  valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
-					flags | PR_CAN_APPEAR);
+					flags | PR_CAN_APPEAR,
+					loc, use, def_insn, set_reg_equal);
 
 	  /* Dismiss transformation that we do not want to carry on.  */
 	  if (!valid_ops
@@ -572,12 +593,20 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 
 	  canonicalize_address (new_op0);
 
+	  tem = replace_equiv_address_nv (x, new_op0);
+
+	  bool ok = try_fwprop_subst (use, loc, tem, def_insn,
+			    set_reg_equal, false);
+
 	  /* Copy propagations are always ok.  Otherwise check the costs.  */
-	  if (!(REG_P (old_rtx) && REG_P (new_rtx))
-	      && !should_replace_address (op0, new_op0, GET_MODE (x),
-					  MEM_ADDR_SPACE (x),
-	      			 	  flags & PR_OPTIMIZE_FOR_SPEED))
-	    return true;
+	  if (!(REG_P (old_rtx) && REG_P (new_rtx)))
+	      {
+		if (!should_replace_address (op0, new_op0, GET_MODE (x),
+					     MEM_ADDR_SPACE (x),
+					     flags & PR_OPTIMIZE_FOR_SPEED,
+					     !ok))
+		  return true;
+	      }
 
 	  tem = replace_equiv_address_nv (x, new_op0);
 	}
@@ -590,8 +619,10 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 	  /* The only simplification we do attempts to remove references to op0
 	     or make it constant -- in both cases, op0's invalidity will not
 	     make the result invalid.  */
-	  propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
-	  valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
+	  propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR,
+			   loc, use, def_insn, set_reg_equal);
+	  valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags,
+					loc, use, def_insn, set_reg_equal);
           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 	    return true;
 
@@ -642,6 +673,13 @@  propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
   return valid_ops || can_appear || CONSTANT_P (tem);
 }
 
+static bool
+propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
+{
+  return propagate_rtx_1 (px, old_rtx, new_rtx, flags,
+		 NULL, NULL, NULL, false);
+}
+
 
 /* Return true if X constains a non-constant mem.  */
 
@@ -666,7 +704,8 @@  varying_mem_p (const_rtx x)
 
 static rtx
 propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
-	       bool speed)
+	       bool speed, df_ref use, rtx_insn *def_insn,
+	       bool set_reg_equal)
 {
   rtx tem;
   bool collapsed;
@@ -689,7 +728,8 @@  propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
     flags |= PR_OPTIMIZE_FOR_SPEED;
 
   tem = x;
-  collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
+  collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags,
+			       &x, use, def_insn, set_reg_equal);
   if (tem == x || !collapsed)
     return NULL_RTX;
 
@@ -706,6 +746,14 @@  propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
   return tem;
 }
 
+static rtx
+propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
+	       bool speed)
+{
+  return propagate_rtx (x, mode, old_rtx, new_rtx,
+			speed, NULL, NULL, false);
+}
+
 
 
 
@@ -950,7 +998,7 @@  update_df (rtx_insn *insn, rtx note)
 
 static bool
 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
-		  bool set_reg_equal)
+		  bool set_reg_equal, bool update)
 {
   rtx_insn *insn = DF_REF_INSN (use);
   rtx set = single_set (insn);
@@ -1002,6 +1050,9 @@  try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
       ok = true;
     }
 
+  if (!update)
+    return ok;
+
   if (ok)
     {
       confirm_change_group ();
@@ -1045,6 +1096,14 @@  try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
   return ok;
 }
 
+static bool
+try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
+		  bool set_reg_equal)
+{
+  return try_fwprop_subst (use, loc, new_rtx, def_insn,
+		  set_reg_equal, true);
+}
+
 /* For the given single_set INSN, containing SRC known to be a
    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
    is redundant due to the register being set by a LOAD_EXTEND_OP
@@ -1353,7 +1412,8 @@  forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
     mode = GET_MODE (*loc);
 
   new_rtx = propagate_rtx (*loc, mode, reg, src,
-  			   optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
+			   optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)),
+			   use, def_insn, set_reg_equal);
 
   if (!new_rtx)
     return false;