diff mbox

Optimize n?rotate(x,n):x

Message ID alpine.DEB.2.02.1405012311480.13799@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse May 1, 2014, 9:52 p.m. UTC
Hello,

here is the latest version. Reviewers seemed happy with different 
versions, so I went for the simplest one. We only give up on the 
transformation if we are optimizing for speed, the short-cut has 
probability >50% and the operation the branch is avoiding is expensive 
(i.e. only divisions, with the current estimate_num_insns). The optab 
checks are gone, they should eventually be added to estimate_num_insns 
instead.

I believe this is covered by the previous "ok", but I won't commit 
anything before Tuesday.

It would have been more general (and shorter) to call fold after 
substituting the tested value and see if the result matches the other phi 
argument (it might handle (a!=b)?a|b:a for instance), instead of the 
*_element_p tests. I may try that later, but I guess the current patch is 
good enough for now.


2014-05-06  Marc Glisse  <marc.glisse@inria.fr>

 	PR tree-optimization/59100
gcc/
 	* tree-ssa-phiopt.c: Include tree-inline.h.
 	(neutral_element_p, absorbing_element_p): New functions.
 	(value_replacement): Handle conditional binary operations with a
 	neutral or absorbing element.
gcc/testsuite/
 	* gcc.dg/tree-ssa/phi-opt-12.c: New file.
 	* gcc.dg/tree-ssa/phi-opt-13.c: Likewise.

Comments

Jeff Law May 14, 2014, 6:35 p.m. UTC | #1
On 05/01/14 15:52, Marc Glisse wrote:
> Hello,
>
> here is the latest version. Reviewers seemed happy with different
> versions, so I went for the simplest one. We only give up on the
> transformation if we are optimizing for speed, the short-cut has
> probability >50% and the operation the branch is avoiding is expensive
> (i.e. only divisions, with the current estimate_num_insns). The optab
> checks are gone, they should eventually be added to estimate_num_insns
> instead.
>
> I believe this is covered by the previous "ok", but I won't commit
> anything before Tuesday.
>
> It would have been more general (and shorter) to call fold after
> substituting the tested value and see if the result matches the other
> phi argument (it might handle (a!=b)?a|b:a for instance), instead of the
> *_element_p tests. I may try that later, but I guess the current patch
> is good enough for now.
>
>
> 2014-05-06  Marc Glisse  <marc.glisse@inria.fr>
>
>      PR tree-optimization/59100
> gcc/
>      * tree-ssa-phiopt.c: Include tree-inline.h.
>      (neutral_element_p, absorbing_element_p): New functions.
>      (value_replacement): Handle conditional binary operations with a
>      neutral or absorbing element.
> gcc/testsuite/
>      * gcc.dg/tree-ssa/phi-opt-12.c: New file.
>      * gcc.dg/tree-ssa/phi-opt-13.c: Likewise.
Going with the simpler one is a good default IMHO.  As you note, you can 
always follow-up with refinements if the additional cases turn out to be 
important.

OK for the trunk.

jeff
diff mbox

Patch

Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c	(working copy)
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-phiopt1" } */
+
+int f(int a, int b, int c) {
+  if (c > 5) return c;
+  if (a == 0) return b;
+  return a + b;
+}
+
+unsigned rot(unsigned x, int n) {
+  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
+  return (n == 0) ? x : ((x << n) | (x >> (bits - n)));
+}
+
+unsigned m(unsigned a, unsigned b) {
+  if (a == 0)
+    return 0;
+  else
+    return a & b;
+}
+
+/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt1" } } */
+/* { dg-final { cleanup-tree-dump "phiopt1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c	(working copy)
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+// Division is expensive
+long f(long a, long b) {
+  if (__builtin_expect(b == 1, 1)) return a;
+  return a / b;
+}
+
+/* { dg-final { scan-tree-dump-times "goto " 2 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/tree-ssa-phiopt.c
===================================================================
--- gcc/tree-ssa-phiopt.c	(revision 209979)
+++ gcc/tree-ssa-phiopt.c	(working copy)
@@ -47,20 +47,21 @@  along with GCC; see the file COPYING3.
 #include "tree-pass.h"
 #include "langhooks.h"
 #include "domwalk.h"
 #include "cfgloop.h"
 #include "tree-data-ref.h"
 #include "gimple-pretty-print.h"
 #include "insn-config.h"
 #include "expr.h"
 #include "optabs.h"
 #include "tree-scalar-evolution.h"
+#include "tree-inline.h"
 
 #ifndef HAVE_conditional_move
 #define HAVE_conditional_move (0)
 #endif
 
 static unsigned int tree_ssa_phiopt_worker (bool, bool);
 static bool conditional_replacement (basic_block, basic_block,
 				     edge, edge, gimple, tree, tree);
 static int value_replacement (basic_block, basic_block,
 			      edge, edge, gimple, tree, tree);
@@ -652,20 +653,78 @@  operand_equal_for_value_replacement (con
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
     return true;
 
   tmp = gimple_assign_rhs2 (def);
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
     return true;
 
   return false;
 }
 
+/* Returns true if ARG is a neutral element for operation CODE
+   on the RIGHT side.  */
+
+static bool
+neutral_element_p (tree_code code, tree arg, bool right)
+{
+  switch (code)
+    {
+    case PLUS_EXPR:
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+      return integer_zerop (arg);
+
+    case LROTATE_EXPR:
+    case RROTATE_EXPR:
+    case LSHIFT_EXPR:
+    case RSHIFT_EXPR:
+    case MINUS_EXPR:
+    case POINTER_PLUS_EXPR:
+      return right && integer_zerop (arg);
+
+    case MULT_EXPR:
+      return integer_onep (arg);
+
+    case TRUNC_DIV_EXPR:
+    case CEIL_DIV_EXPR:
+    case FLOOR_DIV_EXPR:
+    case ROUND_DIV_EXPR:
+    case EXACT_DIV_EXPR:
+      return right && integer_onep (arg);
+
+    case BIT_AND_EXPR:
+      return integer_all_onesp (arg);
+
+    default:
+      return false;
+    }
+}
+
+/* Returns true if ARG is an absorbing element for operation CODE.  */
+
+static bool
+absorbing_element_p (tree_code code, tree arg)
+{
+  switch (code)
+    {
+    case BIT_IOR_EXPR:
+      return integer_all_onesp (arg);
+
+    case MULT_EXPR:
+    case BIT_AND_EXPR:
+      return integer_zerop (arg);
+
+    default:
+      return false;
+    }
+}
+
 /*  The function value_replacement does the main work of doing the value
     replacement.  Return non-zero if the replacement is done.  Otherwise return
     0.  If we remove the middle basic block, return 2.
     BB is the basic block where the replacement is going to be done on.  ARG0
     is argument 0 from the PHI.  Likewise for ARG1.  */
 
 static int
 value_replacement (basic_block cond_bb, basic_block middle_bb,
 		   edge e0, edge e1, gimple phi,
 		   tree arg0, tree arg1)
@@ -676,23 +735,21 @@  value_replacement (basic_block cond_bb,
   enum tree_code code;
   bool emtpy_or_with_defined_p = true;
 
   /* If the type says honor signed zeros we cannot do this
      optimization.  */
   if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
     return 0;
 
   /* If there is a statement in MIDDLE_BB that defines one of the PHI
      arguments, then adjust arg0 or arg1.  */
-  gsi = gsi_after_labels (middle_bb);
-  if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
-    gsi_next_nondebug (&gsi);
+  gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
   while (!gsi_end_p (gsi))
     {
       gimple stmt = gsi_stmt (gsi);
       tree lhs;
       gsi_next_nondebug (&gsi);
       if (!is_gimple_assign (stmt))
 	{
 	  emtpy_or_with_defined_p = false;
 	  continue;
 	}
@@ -774,20 +831,69 @@  value_replacement (basic_block cond_bb,
 	      print_generic_expr (dump_file, gimple_phi_result (phi), 0);
 	      fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
 		       cond_bb->index);
 	      print_generic_expr (dump_file, arg, 0);
 	      fprintf (dump_file, ".\n");
             }
           return 1;
 	}
 
     }
+
+  /* Now optimize (x != 0) ? x + y : y to just y.
+     The following condition is too restrictive, there can easily be another
+     stmt in middle_bb, for instance a CONVERT_EXPR for the second argument.  */
+  gimple assign = last_and_only_stmt (middle_bb);
+  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
+      || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
+      || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+	  && !POINTER_TYPE_P (TREE_TYPE (arg0))))
+    return 0;
+
+  /* Size-wise, this is always profitable.  */
+  if (optimize_bb_for_speed_p (cond_bb)
+      /* The special case is useless if it has a low probability.  */
+      && profile_status_for_fn (cfun) != PROFILE_ABSENT
+      && EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN
+      /* If assign is cheap, there is no point avoiding it.  */
+      && estimate_num_insns (assign, &eni_time_weights)
+	 >= 3 * estimate_num_insns (cond, &eni_time_weights))
+    return 0;
+
+  tree lhs = gimple_assign_lhs (assign);
+  tree rhs1 = gimple_assign_rhs1 (assign);
+  tree rhs2 = gimple_assign_rhs2 (assign);
+  enum tree_code code_def = gimple_assign_rhs_code (assign);
+  tree cond_lhs = gimple_cond_lhs (cond);
+  tree cond_rhs = gimple_cond_rhs (cond);
+
+  if (((code == NE_EXPR && e1 == false_edge)
+	|| (code == EQ_EXPR && e1 == true_edge))
+      && arg0 == lhs
+      && ((arg1 == rhs1
+	   && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
+	   && neutral_element_p (code_def, cond_rhs, true))
+	  || (arg1 == rhs2
+	      && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
+	      && neutral_element_p (code_def, cond_rhs, false))
+	  || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
+	      && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
+		  || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
+	      && absorbing_element_p (code_def, cond_rhs))))
+    {
+      gsi = gsi_for_stmt (cond);
+      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
+      gsi_move_before (&gsi_from, &gsi);
+      replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
+      return 2;
+    }
+
   return 0;
 }
 
 /*  The function minmax_replacement does the main work of doing the minmax
     replacement.  Return true if the replacement is done.  Otherwise return
     false.
     BB is the basic block where the replacement is going to be done on.  ARG0
     is argument 0 from the PHI.  Likewise for ARG1.  */
 
 static bool