Patchwork Fix for PR52081 - Missed tail merging with pure calls

login
register
mail settings
Submitter Tom de Vries
Date April 14, 2012, 5:53 a.m.
Message ID <4F8910F0.4010206@mentor.com>
Download mbox | patch
Permalink /patch/152475/
State New
Headers show

Comments

Tom de Vries - April 14, 2012, 5:53 a.m.
On 06/03/12 15:21, Richard Guenther wrote:
> On Mon, Feb 13, 2012 at 1:36 PM, Tom de Vries <Tom_deVries@mentor.com> wrote:
>> On 13/02/12 12:54, Richard Guenther wrote:
>>> On Thu, Feb 2, 2012 at 11:44 AM, Tom de Vries <Tom_deVries@mentor.com> wrote:
>>>> Richard,
>>>>
>>>> this patch fixes PR52801.
>>>>
>>>> Consider test-case pr51879-12.c:
>>>> ...
>>>> __attribute__((pure)) int bar (int);
>>>> __attribute__((pure)) int bar2 (int);
>>>> void baz (int);
>>>>
>>>> int x, z;
>>>>
>>>> void
>>>> foo (int y)
>>>> {
>>>>  int a = 0;
>>>>  if (y == 6)
>>>>    {
>>>>      a += bar (7);
>>>>      a += bar2 (6);
>>>>    }
>>>>  else
>>>>    {
>>>>      a += bar2 (6);
>>>>      a += bar (7);
>>>>    }
>>>>  baz (a);
>>>> }
>>>> ...
>>>>
>>>> When compiling at -O2, pr51879-12.c.094t.pre looks like this:
>>>> ...
>>>>  # BLOCK 3 freq:1991
>>>>  # PRED: 2 [19.9%]  (true,exec)
>>>>  # VUSE <.MEMD.1722_12(D)>
>>>>  # USE = nonlocal escaped
>>>>  D.1717_4 = barD.1703 (7);
>>>>  # VUSE <.MEMD.1722_12(D)>
>>>>  # USE = nonlocal escaped
>>>>  D.1718_6 = bar2D.1705 (6);
>>>>  aD.1713_7 = D.1717_4 + D.1718_6;
>>>>  goto <bb 5>;
>>>>  # SUCC: 5 [100.0%]  (fallthru,exec)
>>>>
>>>>  # BLOCK 4 freq:8009
>>>>  # PRED: 2 [80.1%]  (false,exec)
>>>>  # VUSE <.MEMD.1722_12(D)>
>>>>  # USE = nonlocal escaped
>>>>  D.1720_8 = bar2D.1705 (6);
>>>>  # VUSE <.MEMD.1722_12(D)>
>>>>  # USE = nonlocal escaped
>>>>  D.1721_10 = barD.1703 (7);
>>>>  aD.1713_11 = D.1720_8 + D.1721_10;
>>>>  # SUCC: 5 [100.0%]  (fallthru,exec)
>>>>
>>>>  # BLOCK 5 freq:10000
>>>>  # PRED: 3 [100.0%]  (fallthru,exec) 4 [100.0%]  (fallthru,exec)
>>>>  # aD.1713_1 = PHI <aD.1713_7(3), aD.1713_11(4)>
>>>>  # .MEMD.1722_13 = VDEF <.MEMD.1722_12(D)>
>>>>  # USE = nonlocal
>>>>  # CLB = nonlocal
>>>>  bazD.1707 (aD.1713_1);
>>>>  # VUSE <.MEMD.1722_13>
>>>>  return;
>>>> ...
>>>> block 3 and 4 can be tail-merged.
>>>>
>>>> Value numbering numbers the two phi arguments a_7 and a_11 the same so the
>>>> problem is not in value numbering:
>>>> ...
>>>> Setting value number of a_11 to a_7 (changed)
>>>> ...
>>>>
>>>> There are 2 reasons that tail_merge_optimize doesn't optimize this:
>>>>
>>>> 1.
>>>> The clause
>>>>  is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt))
>>>>  && !gimple_has_side_effects (stmt)
>>>> used in both same_succ_hash and gsi_advance_bw_nondebug_nonlocal evaluates to
>>>> false for pure calls.
>>>> This is fixed by replacing is_gimple_assign with gimple_has_lhs.
>>>>
>>>> 2.
>>>> In same_succ_equal we check gimples from the 2 bbs side-by-side:
>>>> ...
>>>>  gsi1 = gsi_start_nondebug_bb (bb1);
>>>>  gsi2 = gsi_start_nondebug_bb (bb2);
>>>>  while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
>>>>    {
>>>>      s1 = gsi_stmt (gsi1);
>>>>      s2 = gsi_stmt (gsi2);
>>>>      if (gimple_code (s1) != gimple_code (s2))
>>>>        return 0;
>>>>      if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
>>>>        return 0;
>>>>      gsi_next_nondebug (&gsi1);
>>>>      gsi_next_nondebug (&gsi2);
>>>>    }
>>>> ...
>>>> and we'll be comparing 'bar (7)' and 'bar2 (6)', and gimple_call_same_target_p
>>>> will return false.
>>>> This is fixed by ignoring local defs in this comparison, by using
>>>> gsi_advance_fw_nondebug_nonlocal on the iterators.
>>>>
>>>> bootstrapped and reg-tested on x86_64.
>>>>
>>>> ok for stage1?
>>>
>>> Sorry for responding so late ...
>>
>> no problem :)
>>
>>> I think these fixes hint at that we should
>>> use "structural" equality as fallback if value-numbering doesn't equate
>>> two stmt effects.  Thus, treat two stmts with exactly the same operands
>>> and flags as equal and using value-numbering to canonicalize operands
>>> (when they are SSA names) for that comparison, or use VN entirely
>>> if there are no side-effects on the stmt.
>>>
>>> Changing value-numbering of virtual operands, even if it looks correct in the
>>> simple cases you change, doesn't look like a general solution for the missed
>>> tail merging opportunities.
>>>
>>
>> Your comment is relevant for the other recent tail-merge related fixes I
>> submitted, but I think not for this one.
>>
>> In this case, value-numbering manages to value number the 2 phi-alternatives
>> equal. It's tail-merging that doesn't take advantage of this, by treating pure
>> function calls the same as non-pure function calls. The fixes are therefore in
>> tail-merging, not in value numbering.
>>
>> So, ok for stage1?
> 
> I see.  A few comments.
> 
> +/* Returns whether VAL is used in the same bb as in which it is defined, or
> +   in the phi of a successor bb.  */
> +
> +static bool
> +local_def (tree val)
> +{
> +  gimple stmt, def_stmt;
> +  basic_block bb, def_bb;
> +  imm_use_iterator iter;
> +  bool res;
> +
> +  if (TREE_CODE (val) != SSA_NAME)
> +    return false;
> 
> what about SSA_NAME_IS_DEFAULT_DEF names?  They have a def-stmt
> with a NULL basic-block.

As you suggested below, I've inlined local_def into stmt_local_def, so val is
now initialized with DEF_FROM_PTR (SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF)).
Which means that SSA_NAME_IS_DEFAULT_DEF (val) is false.

> 
> +  res = true;
> +  FOR_EACH_IMM_USE_STMT (stmt, iter, val)
> +    {
> +      bb = gimple_bb (stmt);
> +      if (bb == def_bb)
> +       continue;
> +      if (gimple_code (stmt) == GIMPLE_PHI
> +         && find_edge (def_bb, bb))
> +       continue;
> +      res = false;
> +      BREAK_FROM_IMM_USE_STMT (iter);
> 
> I'd use FOR_EACH_IMM_USE_FAST here, that should be faster (avoids
> the iterator marker writes),

Done.

> and find_edge can be replaced by
> PHI_ARG_INDEX_FROM_USE () - well, you get the edge index that way,
> so EDGE_PRED (def_bb, PHI_ARG_INDEX_FROM_USE (use_p)) == bb
> would be the condition to test.
> 

Done.

> local_def seems to be only used from stmt_local_def, consider
> inlining it there instead.  

Done.

> Btw, what about other DEFs of a stmt
> than the lhs?  I'd say you should use single_ssa_def_operand ()
> to get at the DEF, not gimple_get_lhs.
> 

Done.

> Ok with that changes.

Due to the fix for PR52734, I don't bother anymore about factoring out
commonality between gsi_advance_fw_nondebug_nonlocal and
gsi_advance_bw_nondebug_nonlocal, so the patch is a bit smaller now.

Committed to trunk and attached.

Thanks,
- Tom

> 
> Thanks,
> Richard.

2012-04-14  Tom de Vries  <tom@codesourcery.com>

	* tree-ssa-tail-merge.c (stmt_local_def): New function, factored out of
	same_succ_hash, with local_def inlined.  Use SINGLE_SSA_DEF_OPERAND.
	Use FOR_EACH_IMM_USE_FAST instead of FOR_EACH_IMM_USE_STMT.  Remove use
	of find_edge.
	(gsi_advance_fw_nondebug_nonlocal): New function.
	(local_def): Removed function.
	(same_succ_hash): Use stmt_local_def.
	(same_succ_equal): Use gsi_advance_fw_nondebug_nonlocal.
	(gsi_advance_bw_nondebug_nonlocal): Use stmt_local_def.

	* gcc.dg/pr51879-12.c: New test.

Patch

diff -u gcc/tree-ssa-tail-merge.c (working copy) gcc/tree-ssa-tail-merge.c (working copy)
--- gcc/tree-ssa-tail-merge.c (working copy)
+++ gcc/tree-ssa-tail-merge.c (working copy)
@@ -269,6 +269,67 @@ 
 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
 
+/* Returns true if the only effect a statement STMT has, is to define locally
+   used SSA_NAMEs.  */
+
+static bool
+stmt_local_def (gimple stmt)
+{
+  basic_block bb, def_bb;
+  imm_use_iterator iter;
+  use_operand_p use_p;
+  tree val;
+  def_operand_p def_p;
+
+  if (gimple_has_side_effects (stmt))
+    return false;
+
+  def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
+  if (def_p == NULL)
+    return false;
+
+  val = DEF_FROM_PTR (def_p);
+  if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
+    return false;
+
+  def_bb = gimple_bb (stmt);
+
+  FOR_EACH_IMM_USE_FAST (use_p, iter, val)
+    {
+      if (is_gimple_debug (USE_STMT (use_p)))
+	continue;
+      bb = gimple_bb (USE_STMT (use_p));
+      if (bb == def_bb)
+	continue;
+
+      if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
+	  && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
+	continue;
+
+      return false;
+    }
+
+  return true;
+}
+
+/* Let GSI skip forwards over local defs.  */
+
+static void
+gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
+{
+  gimple stmt;
+
+  while (true)
+    {
+      if (gsi_end_p (*gsi))
+	return;
+      stmt = gsi_stmt (*gsi);
+      if (!stmt_local_def (stmt))
+	return;
+	gsi_next_nondebug (gsi);
+    }
+}
+
 /* VAL1 and VAL2 are either:
    - uses in BB1 and BB2, or
    - phi alternatives for BB1 and BB2.
@@ -352,39 +413,6 @@ 
     update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
 }
 
-/* Returns whether VAL is used in the same bb as in which it is defined, or
-   in the phi of a successor bb.  */
-
-static bool
-local_def (tree val)
-{
-  gimple stmt, def_stmt;
-  basic_block bb, def_bb;
-  imm_use_iterator iter;
-  bool res;
-
-  if (TREE_CODE (val) != SSA_NAME)
-    return false;
-  def_stmt = SSA_NAME_DEF_STMT (val);
-  def_bb = gimple_bb (def_stmt);
-
-  res = true;
-  FOR_EACH_IMM_USE_STMT (stmt, iter, val)
-    {
-      if (is_gimple_debug (stmt))
-	continue;
-      bb = gimple_bb (stmt);
-      if (bb == def_bb)
-	continue;
-      if (gimple_code (stmt) == GIMPLE_PHI
-	  && find_edge (def_bb, bb))
-	continue;
-      res = false;
-      BREAK_FROM_IMM_USE_STMT (iter);
-    }
-  return res;
-}
-
 /* Calculates hash value for same_succ VE.  */
 
 static hashval_t
@@ -408,8 +436,7 @@ 
     {
       stmt = gsi_stmt (gsi);
       stmt_update_dep_bb (stmt);
-      if (is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt))
-	  && !gimple_has_side_effects (stmt))
+      if (stmt_local_def (stmt))
 	continue;
       size++;
 
@@ -525,6 +552,8 @@ 
 
   gsi1 = gsi_start_nondebug_bb (bb1);
   gsi2 = gsi_start_nondebug_bb (bb2);
+  gsi_advance_fw_nondebug_nonlocal (&gsi1);
+  gsi_advance_fw_nondebug_nonlocal (&gsi2);
   while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
     {
       s1 = gsi_stmt (gsi1);
@@ -535,6 +564,8 @@ 
 	return 0;
       gsi_next_nondebug (&gsi1);
       gsi_next_nondebug (&gsi2);
+      gsi_advance_fw_nondebug_nonlocal (&gsi1);
+      gsi_advance_fw_nondebug_nonlocal (&gsi2);
     }
 
   return 1;
@@ -1148,8 +1179,7 @@ 
 	    *vuse_escaped = true;
 	}
 
-      if (!(is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt))
-	    && !gimple_has_side_effects (stmt)))
+      if (!stmt_local_def (stmt))
 	return;
       gsi_prev_nondebug (gsi);
     }
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/pr51879-12.c (revision 0)
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-pre" } */
+
+__attribute__((pure)) int bar (int);
+__attribute__((pure)) int bar2 (int);
+void baz (int);
+
+int x, z;
+
+void
+foo (int y)
+{
+  int a = 0;
+  if (y == 6)
+    {
+      a += bar (7);
+      a += bar2 (6);
+    }
+  else
+    {
+      a += bar2 (6);
+      a += bar (7);
+    }
+  baz (a);
+}
+
+/* { dg-final { scan-tree-dump-times "bar \\(" 1 "pre"} } */
+/* { dg-final { scan-tree-dump-times "bar2 \\(" 1 "pre"} } */
+/* { dg-final { cleanup-tree-dump "pre" } } */