diff mbox

Make PRE/FRE elimination not do useless work

Message ID alpine.LSU.2.20.1705101618310.20726@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener May 10, 2017, 2:20 p.m. UTC
So this is a patch that makes skipping unreachable code when
doing elimination possible.  Previously interesting interactions
with tail-merging made this impossible, now I seem to have
figured a way around this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

With this more elaborate stmt folding during elimination might
be in reach.

Richard.

2017-05-10  Richard Biener  <rguenther@suse.de>

	* tree-ssa-pre.c (eliminate_dom_walker::before_dom_children):
	Skip unreachable blocks and destinations.
	(eliminate): Move stmt removal and fixup ...
	(fini_eliminate): ... here.  Skip inserted exprs.
	(pass_pre::execute): Move fini_pre after fini_eliminate.
	* tree-ssa-tailmerge.c: Include tree-cfgcleanup.h.
	(tail_merge_optimize): Run cleanup_tree_cfg if requested by
	PRE to get rid of dead code that has invalid SSA form and
	split critical edges again.

Comments

Christophe Lyon May 11, 2017, 3:08 p.m. UTC | #1
Hi Richard,


On 10 May 2017 at 16:20, Richard Biener <rguenther@suse.de> wrote:
>
> So this is a patch that makes skipping unreachable code when
> doing elimination possible.  Previously interesting interactions
> with tail-merging made this impossible, now I seem to have
> figured a way around this.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> With this more elaborate stmt folding during elimination might
> be in reach.
>
> Richard.
>
> 2017-05-10  Richard Biener  <rguenther@suse.de>
>
>         * tree-ssa-pre.c (eliminate_dom_walker::before_dom_children):
>         Skip unreachable blocks and destinations.
>         (eliminate): Move stmt removal and fixup ...
>         (fini_eliminate): ... here.  Skip inserted exprs.
>         (pass_pre::execute): Move fini_pre after fini_eliminate.
>         * tree-ssa-tailmerge.c: Include tree-cfgcleanup.h.
>         (tail_merge_optimize): Run cleanup_tree_cfg if requested by
>         PRE to get rid of dead code that has invalid SSA form and
>         split critical edges again.
>
> Index: gcc/tree-ssa-pre.c
> ===================================================================
> --- gcc/tree-ssa-pre.c  (revision 247831)
> +++ gcc/tree-ssa-pre.c  (working copy)
> @@ -4196,9 +4196,14 @@ eliminate_dom_walker::before_dom_childre
>    /* Mark new bb.  */
>    el_avail_stack.safe_push (NULL_TREE);
>
> -  /* ???  If we do nothing for unreachable blocks then this will confuse
> -     tailmerging.  Eventually we can reduce its reliance on SCCVN now
> -     that we fully copy/constant-propagate (most) things.  */
> +  /* Skip unreachable blocks marked unreachable during the SCCVN domwalk.  */
> +  edge_iterator ei;
> +  edge e;
> +  FOR_EACH_EDGE (e, ei, b->preds)
> +    if (e->flags & EDGE_EXECUTABLE)
> +      break;
> +  if (! e)
> +    return NULL;
>
>    for (gphi_iterator gsi = gsi_start_phis (b); !gsi_end_p (gsi);)
>      {
> @@ -4695,10 +4700,8 @@ eliminate_dom_walker::before_dom_childre
>      }
>
>    /* Replace destination PHI arguments.  */
> -  edge_iterator ei;
> -  edge e;
>    FOR_EACH_EDGE (e, ei, b->succs)
> -    {
> +    if (e->flags & EDGE_EXECUTABLE)
>        for (gphi_iterator gsi = gsi_start_phis (e->dest);
>            !gsi_end_p (gsi);
>            gsi_next (&gsi))
> @@ -4717,7 +4720,6 @@ eliminate_dom_walker::before_dom_childre
>                 gimple_set_plf (SSA_NAME_DEF_STMT (sprime), NECESSARY, true);
>             }
>         }
> -    }
>    return NULL;
>  }
>
> @@ -4743,9 +4745,6 @@ eliminate_dom_walker::after_dom_children
>  static unsigned int
>  eliminate (bool do_pre)
>  {
> -  gimple_stmt_iterator gsi;
> -  gimple *stmt;
> -
>    need_eh_cleanup = BITMAP_ALLOC (NULL);
>    need_ab_cleanup = BITMAP_ALLOC (NULL);
>
> @@ -4761,6 +4760,18 @@ eliminate (bool do_pre)
>    el_avail.release ();
>    el_avail_stack.release ();
>
> +  return el_todo;
> +}
> +
> +/* Perform CFG cleanups made necessary by elimination.  */
> +
> +static unsigned
> +fini_eliminate (void)
> +{
> +  gimple_stmt_iterator gsi;
> +  gimple *stmt;
> +  unsigned todo = 0;
> +
>    /* We cannot remove stmts during BB walk, especially not release SSA
>       names there as this confuses the VN machinery.  The stmts ending
>       up in el_to_remove are either stores or simple copies.
> @@ -4782,8 +4793,9 @@ eliminate (bool do_pre)
>         lhs = gimple_get_lhs (stmt);
>
>        if (inserted_exprs
> -         && TREE_CODE (lhs) == SSA_NAME)
> -       bitmap_clear_bit (inserted_exprs, SSA_NAME_VERSION (lhs));
> +         && TREE_CODE (lhs) == SSA_NAME
> +         && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (lhs)))
> +       continue;
>
>        gsi = gsi_for_stmt (stmt);
>        if (gimple_code (stmt) == GIMPLE_PHI)
> @@ -4800,7 +4812,7 @@ eliminate (bool do_pre)
>         }
>
>        /* Removing a stmt may expose a forwarder block.  */
> -      el_todo |= TODO_cleanup_cfg;
> +      todo |= TODO_cleanup_cfg;
>      }
>    el_to_remove.release ();
>
> @@ -4819,18 +4831,10 @@ eliminate (bool do_pre)
>         }
>
>        if (fixup_noreturn_call (stmt))
> -       el_todo |= TODO_cleanup_cfg;
> +       todo |= TODO_cleanup_cfg;
>      }
>    el_to_fixup.release ();
>
> -  return el_todo;
> -}
> -
> -/* Perform CFG cleanups made necessary by elimination.  */
> -
> -static unsigned
> -fini_eliminate (void)
> -{
>    bool do_eh_cleanup = !bitmap_empty_p (need_eh_cleanup);
>    bool do_ab_cleanup = !bitmap_empty_p (need_ab_cleanup);
>
> @@ -4844,8 +4848,8 @@ fini_eliminate (void)
>    BITMAP_FREE (need_ab_cleanup);
>
>    if (do_eh_cleanup || do_ab_cleanup)
> -    return TODO_cleanup_cfg;
> -  return 0;
> +    todo |= TODO_cleanup_cfg;
> +  return todo;
>  }
>
>  /* Borrow a bit of tree-ssa-dce.c for the moment.
> @@ -5110,8 +5114,8 @@ pass_pre::execute (function *fun)
>    remove_dead_inserted_code ();
>
>    scev_finalize ();
> -  fini_pre ();
>    todo |= fini_eliminate ();
> +  fini_pre ();
>    loop_optimizer_finalize ();
>
>    /* Restore SSA info before tail-merging as that resets it as well.  */
> Index: gcc/tree-ssa-tail-merge.c
> ===================================================================
> --- gcc/tree-ssa-tail-merge.c   (revision 247831)
> +++ gcc/tree-ssa-tail-merge.c   (working copy)
> @@ -205,6 +205,7 @@ along with GCC; see the file COPYING3.
>  #include "tree-ssa-sccvn.h"
>  #include "cfgloop.h"
>  #include "tree-eh.h"
> +#include "tree-cfgcleanup.h"
>
>  /* Describes a group of bbs with the same successors.  The successor bbs are
>     cached in succs, and the successor edge flags are cached in succ_flags.
> @@ -1717,6 +1718,16 @@ tail_merge_optimize (unsigned int todo)
>
>    timevar_push (TV_TREE_TAIL_MERGE);
>
> +  /* We enter from PRE which has critical edges split.  Elimination
> +     does not process trivially dead code so cleanup the CFG if we
> +     are told so.  And re-split critical edges then.  */
> +  if (todo & TODO_cleanup_cfg)
> +    {
> +      cleanup_tree_cfg ();
> +      todo &= ~TODO_cleanup_cfg;
> +      split_critical_edges ();
> +    }
> +
>    if (!dom_info_available_p (CDI_DOMINATORS))
>      {
>        /* PRE can leave us with unreachable blocks, remove them now.  */


This patch (r247882) causes regression on:
    gcc.dg/pr46309.c scan-tree-dump-times reassoc2 "Optimizing range
tests [^\r\n]*_[0-9]* -.0, 31. and -.128, 159.[\n\r]* into" 1
on arm-linux-gnueabihf when configured --with-cpu=cortex-a5
--wtih-fpu=vfpv3-d16-fp16

(OK with cortex-a9 for instance)

Christophe
Richard Biener May 12, 2017, 9:09 a.m. UTC | #2
On Thu, 11 May 2017, Christophe Lyon wrote:

> Hi Richard,
> 
> 
> On 10 May 2017 at 16:20, Richard Biener <rguenther@suse.de> wrote:
> >
> > So this is a patch that makes skipping unreachable code when
> > doing elimination possible.  Previously interesting interactions
> > with tail-merging made this impossible, now I seem to have
> > figured a way around this.
> >
> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >
> > With this more elaborate stmt folding during elimination might
> > be in reach.
> >
> > Richard.
> >
> > 2017-05-10  Richard Biener  <rguenther@suse.de>
> >
> >         * tree-ssa-pre.c (eliminate_dom_walker::before_dom_children):
> >         Skip unreachable blocks and destinations.
> >         (eliminate): Move stmt removal and fixup ...
> >         (fini_eliminate): ... here.  Skip inserted exprs.
> >         (pass_pre::execute): Move fini_pre after fini_eliminate.
> >         * tree-ssa-tailmerge.c: Include tree-cfgcleanup.h.
> >         (tail_merge_optimize): Run cleanup_tree_cfg if requested by
> >         PRE to get rid of dead code that has invalid SSA form and
> >         split critical edges again.
> >
> > Index: gcc/tree-ssa-pre.c
> > ===================================================================
> > --- gcc/tree-ssa-pre.c  (revision 247831)
> > +++ gcc/tree-ssa-pre.c  (working copy)
> > @@ -4196,9 +4196,14 @@ eliminate_dom_walker::before_dom_childre
> >    /* Mark new bb.  */
> >    el_avail_stack.safe_push (NULL_TREE);
> >
> > -  /* ???  If we do nothing for unreachable blocks then this will confuse
> > -     tailmerging.  Eventually we can reduce its reliance on SCCVN now
> > -     that we fully copy/constant-propagate (most) things.  */
> > +  /* Skip unreachable blocks marked unreachable during the SCCVN domwalk.  */
> > +  edge_iterator ei;
> > +  edge e;
> > +  FOR_EACH_EDGE (e, ei, b->preds)
> > +    if (e->flags & EDGE_EXECUTABLE)
> > +      break;
> > +  if (! e)
> > +    return NULL;
> >
> >    for (gphi_iterator gsi = gsi_start_phis (b); !gsi_end_p (gsi);)
> >      {
> > @@ -4695,10 +4700,8 @@ eliminate_dom_walker::before_dom_childre
> >      }
> >
> >    /* Replace destination PHI arguments.  */
> > -  edge_iterator ei;
> > -  edge e;
> >    FOR_EACH_EDGE (e, ei, b->succs)
> > -    {
> > +    if (e->flags & EDGE_EXECUTABLE)
> >        for (gphi_iterator gsi = gsi_start_phis (e->dest);
> >            !gsi_end_p (gsi);
> >            gsi_next (&gsi))
> > @@ -4717,7 +4720,6 @@ eliminate_dom_walker::before_dom_childre
> >                 gimple_set_plf (SSA_NAME_DEF_STMT (sprime), NECESSARY, true);
> >             }
> >         }
> > -    }
> >    return NULL;
> >  }
> >
> > @@ -4743,9 +4745,6 @@ eliminate_dom_walker::after_dom_children
> >  static unsigned int
> >  eliminate (bool do_pre)
> >  {
> > -  gimple_stmt_iterator gsi;
> > -  gimple *stmt;
> > -
> >    need_eh_cleanup = BITMAP_ALLOC (NULL);
> >    need_ab_cleanup = BITMAP_ALLOC (NULL);
> >
> > @@ -4761,6 +4760,18 @@ eliminate (bool do_pre)
> >    el_avail.release ();
> >    el_avail_stack.release ();
> >
> > +  return el_todo;
> > +}
> > +
> > +/* Perform CFG cleanups made necessary by elimination.  */
> > +
> > +static unsigned
> > +fini_eliminate (void)
> > +{
> > +  gimple_stmt_iterator gsi;
> > +  gimple *stmt;
> > +  unsigned todo = 0;
> > +
> >    /* We cannot remove stmts during BB walk, especially not release SSA
> >       names there as this confuses the VN machinery.  The stmts ending
> >       up in el_to_remove are either stores or simple copies.
> > @@ -4782,8 +4793,9 @@ eliminate (bool do_pre)
> >         lhs = gimple_get_lhs (stmt);
> >
> >        if (inserted_exprs
> > -         && TREE_CODE (lhs) == SSA_NAME)
> > -       bitmap_clear_bit (inserted_exprs, SSA_NAME_VERSION (lhs));
> > +         && TREE_CODE (lhs) == SSA_NAME
> > +         && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (lhs)))
> > +       continue;
> >
> >        gsi = gsi_for_stmt (stmt);
> >        if (gimple_code (stmt) == GIMPLE_PHI)
> > @@ -4800,7 +4812,7 @@ eliminate (bool do_pre)
> >         }
> >
> >        /* Removing a stmt may expose a forwarder block.  */
> > -      el_todo |= TODO_cleanup_cfg;
> > +      todo |= TODO_cleanup_cfg;
> >      }
> >    el_to_remove.release ();
> >
> > @@ -4819,18 +4831,10 @@ eliminate (bool do_pre)
> >         }
> >
> >        if (fixup_noreturn_call (stmt))
> > -       el_todo |= TODO_cleanup_cfg;
> > +       todo |= TODO_cleanup_cfg;
> >      }
> >    el_to_fixup.release ();
> >
> > -  return el_todo;
> > -}
> > -
> > -/* Perform CFG cleanups made necessary by elimination.  */
> > -
> > -static unsigned
> > -fini_eliminate (void)
> > -{
> >    bool do_eh_cleanup = !bitmap_empty_p (need_eh_cleanup);
> >    bool do_ab_cleanup = !bitmap_empty_p (need_ab_cleanup);
> >
> > @@ -4844,8 +4848,8 @@ fini_eliminate (void)
> >    BITMAP_FREE (need_ab_cleanup);
> >
> >    if (do_eh_cleanup || do_ab_cleanup)
> > -    return TODO_cleanup_cfg;
> > -  return 0;
> > +    todo |= TODO_cleanup_cfg;
> > +  return todo;
> >  }
> >
> >  /* Borrow a bit of tree-ssa-dce.c for the moment.
> > @@ -5110,8 +5114,8 @@ pass_pre::execute (function *fun)
> >    remove_dead_inserted_code ();
> >
> >    scev_finalize ();
> > -  fini_pre ();
> >    todo |= fini_eliminate ();
> > +  fini_pre ();
> >    loop_optimizer_finalize ();
> >
> >    /* Restore SSA info before tail-merging as that resets it as well.  */
> > Index: gcc/tree-ssa-tail-merge.c
> > ===================================================================
> > --- gcc/tree-ssa-tail-merge.c   (revision 247831)
> > +++ gcc/tree-ssa-tail-merge.c   (working copy)
> > @@ -205,6 +205,7 @@ along with GCC; see the file COPYING3.
> >  #include "tree-ssa-sccvn.h"
> >  #include "cfgloop.h"
> >  #include "tree-eh.h"
> > +#include "tree-cfgcleanup.h"
> >
> >  /* Describes a group of bbs with the same successors.  The successor bbs are
> >     cached in succs, and the successor edge flags are cached in succ_flags.
> > @@ -1717,6 +1718,16 @@ tail_merge_optimize (unsigned int todo)
> >
> >    timevar_push (TV_TREE_TAIL_MERGE);
> >
> > +  /* We enter from PRE which has critical edges split.  Elimination
> > +     does not process trivially dead code so cleanup the CFG if we
> > +     are told so.  And re-split critical edges then.  */
> > +  if (todo & TODO_cleanup_cfg)
> > +    {
> > +      cleanup_tree_cfg ();
> > +      todo &= ~TODO_cleanup_cfg;
> > +      split_critical_edges ();
> > +    }
> > +
> >    if (!dom_info_available_p (CDI_DOMINATORS))
> >      {
> >        /* PRE can leave us with unreachable blocks, remove them now.  */
> 
> 
> This patch (r247882) causes regression on:
>     gcc.dg/pr46309.c scan-tree-dump-times reassoc2 "Optimizing range
> tests [^\r\n]*_[0-9]* -.0, 31. and -.128, 159.[\n\r]* into" 1
> on arm-linux-gnueabihf when configured --with-cpu=cortex-a5
> --wtih-fpu=vfpv3-d16-fp16
> 
> (OK with cortex-a9 for instance)

Huh, that's weird.  It shouldn't have any code-gen effect (ok, maybe
it causes additional tail-merging to happen, but looking at the
testcase I can't see any such opportunities).

Will have to try reproduce with a cross - I'm currently testing
another patch for fallout of the above change (PR80713).

Richard.
diff mbox

Patch

Index: gcc/tree-ssa-pre.c
===================================================================
--- gcc/tree-ssa-pre.c	(revision 247831)
+++ gcc/tree-ssa-pre.c	(working copy)
@@ -4196,9 +4196,14 @@  eliminate_dom_walker::before_dom_childre
   /* Mark new bb.  */
   el_avail_stack.safe_push (NULL_TREE);
 
-  /* ???  If we do nothing for unreachable blocks then this will confuse
-     tailmerging.  Eventually we can reduce its reliance on SCCVN now
-     that we fully copy/constant-propagate (most) things.  */
+  /* Skip unreachable blocks marked unreachable during the SCCVN domwalk.  */
+  edge_iterator ei;
+  edge e;
+  FOR_EACH_EDGE (e, ei, b->preds)
+    if (e->flags & EDGE_EXECUTABLE)
+      break;
+  if (! e)
+    return NULL;
 
   for (gphi_iterator gsi = gsi_start_phis (b); !gsi_end_p (gsi);)
     {
@@ -4695,10 +4700,8 @@  eliminate_dom_walker::before_dom_childre
     }
 
   /* Replace destination PHI arguments.  */
-  edge_iterator ei;
-  edge e;
   FOR_EACH_EDGE (e, ei, b->succs)
-    {
+    if (e->flags & EDGE_EXECUTABLE)
       for (gphi_iterator gsi = gsi_start_phis (e->dest);
 	   !gsi_end_p (gsi);
 	   gsi_next (&gsi))
@@ -4717,7 +4720,6 @@  eliminate_dom_walker::before_dom_childre
 		gimple_set_plf (SSA_NAME_DEF_STMT (sprime), NECESSARY, true);
 	    }
 	}
-    }
   return NULL;
 }
 
@@ -4743,9 +4745,6 @@  eliminate_dom_walker::after_dom_children
 static unsigned int
 eliminate (bool do_pre)
 {
-  gimple_stmt_iterator gsi;
-  gimple *stmt;
-
   need_eh_cleanup = BITMAP_ALLOC (NULL);
   need_ab_cleanup = BITMAP_ALLOC (NULL);
 
@@ -4761,6 +4760,18 @@  eliminate (bool do_pre)
   el_avail.release ();
   el_avail_stack.release ();
 
+  return el_todo;
+}
+
+/* Perform CFG cleanups made necessary by elimination.  */
+
+static unsigned 
+fini_eliminate (void)
+{
+  gimple_stmt_iterator gsi;
+  gimple *stmt;
+  unsigned todo = 0;
+
   /* We cannot remove stmts during BB walk, especially not release SSA
      names there as this confuses the VN machinery.  The stmts ending
      up in el_to_remove are either stores or simple copies.
@@ -4782,8 +4793,9 @@  eliminate (bool do_pre)
 	lhs = gimple_get_lhs (stmt);
 
       if (inserted_exprs
-	  && TREE_CODE (lhs) == SSA_NAME)
-	bitmap_clear_bit (inserted_exprs, SSA_NAME_VERSION (lhs));
+	  && TREE_CODE (lhs) == SSA_NAME
+	  && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (lhs)))
+	continue;
 
       gsi = gsi_for_stmt (stmt);
       if (gimple_code (stmt) == GIMPLE_PHI)
@@ -4800,7 +4812,7 @@  eliminate (bool do_pre)
 	}
 
       /* Removing a stmt may expose a forwarder block.  */
-      el_todo |= TODO_cleanup_cfg;
+      todo |= TODO_cleanup_cfg;
     }
   el_to_remove.release ();
 
@@ -4819,18 +4831,10 @@  eliminate (bool do_pre)
 	}
 
       if (fixup_noreturn_call (stmt))
-	el_todo |= TODO_cleanup_cfg;
+	todo |= TODO_cleanup_cfg;
     }
   el_to_fixup.release ();
 
-  return el_todo;
-}
-
-/* Perform CFG cleanups made necessary by elimination.  */
-
-static unsigned 
-fini_eliminate (void)
-{
   bool do_eh_cleanup = !bitmap_empty_p (need_eh_cleanup);
   bool do_ab_cleanup = !bitmap_empty_p (need_ab_cleanup);
 
@@ -4844,8 +4848,8 @@  fini_eliminate (void)
   BITMAP_FREE (need_ab_cleanup);
 
   if (do_eh_cleanup || do_ab_cleanup)
-    return TODO_cleanup_cfg;
-  return 0;
+    todo |= TODO_cleanup_cfg;
+  return todo;
 }
 
 /* Borrow a bit of tree-ssa-dce.c for the moment.
@@ -5110,8 +5114,8 @@  pass_pre::execute (function *fun)
   remove_dead_inserted_code ();
 
   scev_finalize ();
-  fini_pre ();
   todo |= fini_eliminate ();
+  fini_pre ();
   loop_optimizer_finalize ();
 
   /* Restore SSA info before tail-merging as that resets it as well.  */
Index: gcc/tree-ssa-tail-merge.c
===================================================================
--- gcc/tree-ssa-tail-merge.c	(revision 247831)
+++ gcc/tree-ssa-tail-merge.c	(working copy)
@@ -205,6 +205,7 @@  along with GCC; see the file COPYING3.
 #include "tree-ssa-sccvn.h"
 #include "cfgloop.h"
 #include "tree-eh.h"
+#include "tree-cfgcleanup.h"
 
 /* Describes a group of bbs with the same successors.  The successor bbs are
    cached in succs, and the successor edge flags are cached in succ_flags.
@@ -1717,6 +1718,16 @@  tail_merge_optimize (unsigned int todo)
 
   timevar_push (TV_TREE_TAIL_MERGE);
 
+  /* We enter from PRE which has critical edges split.  Elimination
+     does not process trivially dead code so cleanup the CFG if we
+     are told so.  And re-split critical edges then.  */
+  if (todo & TODO_cleanup_cfg)
+    {
+      cleanup_tree_cfg ();
+      todo &= ~TODO_cleanup_cfg;
+      split_critical_edges ();
+    }
+
   if (!dom_info_available_p (CDI_DOMINATORS))
     {
       /* PRE can leave us with unreachable blocks, remove them now.  */