diff mbox

[RFA,PR,tree-optimization/45685]

Message ID 52A8CA45.8090108@redhat.com
State New
Headers show

Commit Message

Jeff Law Dec. 11, 2013, 8:25 p.m. UTC
On 12/11/13 02:51, Richard Biener wrote:
>
> That looks wrong - you want to look at HONOR_NANS on the mode
> of one of the comparison operands, not of the actual value you want
> to negate (it's integer and thus never has NaNs).
>
> The rest of the patch looks ok to me.
Here's the updated version.  It addresses the two issues you raised. 
Specifically it adds the hairy condition to avoid this code in cases 
where it's not likely to be useful and it fixes the call to 
invert_tree_comparison.

Bootstrapped and regression tested on x86_64-unknown-linux-gnu
OK for the trunk?

jeff
PR tree-optimization/45685
	* tree-ssa-phiopt.c (neg_replacement): New function.
	(tree_ssa_phiopt_worker): Call it.
	
	PR tree-optimization/45685
	* gcc.dg/tree-ssa/pr45685.c: New test.

Comments

Richard Biener Dec. 13, 2013, 9:18 a.m. UTC | #1
On Wed, Dec 11, 2013 at 9:25 PM, Jeff Law <law@redhat.com> wrote:
> On 12/11/13 02:51, Richard Biener wrote:
>>
>>
>> That looks wrong - you want to look at HONOR_NANS on the mode
>> of one of the comparison operands, not of the actual value you want
>> to negate (it's integer and thus never has NaNs).
>>
>> The rest of the patch looks ok to me.
>
> Here's the updated version.  It addresses the two issues you raised.
> Specifically it adds the hairy condition to avoid this code in cases where
> it's not likely to be useful and it fixes the call to
> invert_tree_comparison.
>
>
> Bootstrapped and regression tested on x86_64-unknown-linux-gnu
> OK for the trunk?

Ok.

Thanks,
Richard.

> jeff
>
>
>         PR tree-optimization/45685
>         * tree-ssa-phiopt.c (neg_replacement): New function.
>         (tree_ssa_phiopt_worker): Call it.
>
>         PR tree-optimization/45685
>         * gcc.dg/tree-ssa/pr45685.c: New test.
>
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
> new file mode 100644
> index 0000000..0628943
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */
> +
> +typedef unsigned long int uint64_t;
> +typedef long int int64_t;
> +int summation_helper_1(int64_t* products, uint64_t count)
> +{
> +       int s = 0;
> +       uint64_t i;
> +       for(i=0; i<count; i++)
> +       {
> +               int64_t val = (products[i]>0) ? 1 : -1;
> +               products[i] *= val;
> +               if(products[i] != i)
> +                       val = -val;
> +               products[i] = val;
> +               s += val;
> +       }
> +       return s;
> +}
> +
> +
> +int summation_helper_2(int64_t* products, uint64_t count)
> +{
> +       int s = 0;
> +       uint64_t i;
> +       for(i=0; i<count; i++)
> +       {
> +               int val = (products[i]>0) ? 1 : -1;
> +               products[i] *= val;
> +               if(products[i] != i)
> +                       val = -val;
> +               products[i] = val;
> +               s += val;
> +       }
> +       return s;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "converted to straightline code" 2
> "phiopt1" } } */
> +/* { dg-final { cleanup-tree-dump "phiopt1" } } */
> +
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index 11e565f..96154fb 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -69,6 +69,8 @@ static bool minmax_replacement (basic_block, basic_block,
>                                 edge, edge, gimple, tree, tree);
>  static bool abs_replacement (basic_block, basic_block,
>                              edge, edge, gimple, tree, tree);
> +static bool neg_replacement (basic_block, basic_block,
> +                            edge, edge, gimple, tree, tree);
>  static bool cond_store_replacement (basic_block, basic_block, edge, edge,
>                                     struct pointer_set_t *);
>  static bool cond_if_else_store_replacement (basic_block, basic_block,
> basic_block);
> @@ -336,6 +338,23 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool
> do_hoist_loads)
>      /* Calculate the set of non-trapping memory accesses.  */
>      nontrap = get_non_trapping ();
>
> +  /* The replacement of conditional negation with a non-branching
> +     sequence is really only a win when optimizing for speed and we
> +     can avoid transformations by gimple if-conversion that result
> +     in poor RTL generation.
> +
> +     Ideally either gimple if-conversion or the RTL expanders will
> +     be improved and the code to emit branchless conditional negation
> +     can be removed.  */
> +  bool replace_conditional_negation = false;
> +  if (!do_store_elim)
> +    replace_conditional_negation
> +      = ((!optimize_size && optimize >= 2)
> +        || (((flag_tree_loop_vectorize || cfun->has_force_vect_loops)
> +             && flag_tree_loop_if_convert != 0)
> +            || flag_tree_loop_if_convert == 1
> +            || flag_tree_loop_if_convert_stores == 1));
> +
>    /* Search every basic block for COND_EXPR we may be able to optimize.
>
>       We walk the blocks in order that guarantees that a block with
> @@ -489,6 +508,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool
> do_hoist_loads)
>             cfgchanged = true;
>           else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>             cfgchanged = true;
> +         else if (replace_conditional_negation
> +                  && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
> +           cfgchanged = true;
>           else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>             cfgchanged = true;
>         }
> @@ -1285,6 +1307,143 @@ abs_replacement (basic_block cond_bb, basic_block
> middle_bb,
>    return true;
>  }
>
> +/*  The function neg_replacement replaces conditional negation with
> +    equivalent straight line code.  Returns TRUE if replacement is done,
> +    otherwise returns FALSE.
> +
> +    COND_BB branches around negation occuring in MIDDLE_BB.
> +
> +    E0 and E1 are edges out of COND_BB.  E0 reaches MIDDLE_BB and
> +    E1 reaches the other successor which should contain PHI with
> +    arguments ARG0 and ARG1.
> +
> +    Assuming negation is to occur when the condition is true,
> +    then the non-branching sequence is:
> +
> +       result = (rhs ^ -cond) + cond
> +
> +    Inverting the condition or its result gives us negation
> +    when the original condition is false.  */
> +
> +static bool
> +neg_replacement (basic_block cond_bb, basic_block middle_bb,
> +                edge e0 ATTRIBUTE_UNUSED, edge e1,
> +                gimple phi, tree arg0, tree arg1)
> +{
> +  gimple new_stmt, cond;
> +  gimple_stmt_iterator gsi;
> +  gimple assign;
> +  edge true_edge, false_edge;
> +  tree rhs, lhs;
> +  enum tree_code cond_code;
> +  bool invert = false;
> +
> +  /* This transformation performs logical operations on the
> +     incoming arguments.  So force them to be integral types.   */
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
> +    return false;
> +
> +  /* OTHER_BLOCK must have only one executable statement which must have
> the
> +     form arg0 = -arg1 or arg1 = -arg0.  */
> +
> +  assign = last_and_only_stmt (middle_bb);
> +  /* If we did not find the proper negation assignment, then we can not
> +     optimize.  */
> +  if (assign == NULL)
> +    return false;
> +
> +  /* If we got here, then we have found the only executable statement
> +     in OTHER_BLOCK.  If it is anything other than arg0 = -arg1 or
> +     arg1 = -arg0, then we can not optimize.  */
> +  if (gimple_code (assign) != GIMPLE_ASSIGN)
> +    return false;
> +
> +  lhs = gimple_assign_lhs (assign);
> +
> +  if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
> +    return false;
> +
> +  rhs = gimple_assign_rhs1 (assign);
> +
> +  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
> +  if (!(lhs == arg0 && rhs == arg1)
> +      && !(lhs == arg1 && rhs == arg0))
> +    return false;
> +
> +  /* The basic sequence assumes we negate when the condition is true.
> +     If we need the opposite, then we will either need to invert the
> +     condition or its result.  */
> +  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
> +  invert = false_edge->dest == middle_bb;
> +
> +  /* Unlike abs_replacement, we can handle arbitrary conditionals here.  */
> +  cond = last_stmt (cond_bb);
> +  cond_code = gimple_cond_code (cond);
> +
> +  /* If inversion is needed, first try to invert the test since
> +     that's cheapest.  */
> +  if (invert)
> +    {
> +      bool honor_nans
> +       = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (cond))));
> +      enum tree_code new_code = invert_tree_comparison (cond_code,
> honor_nans);
> +
> +      /* If invert_tree_comparison was successful, then use its return
> +        value as the new code and note that inversion is no longer
> +        needed.  */
> +      if (new_code != ERROR_MARK)
> +       {
> +         cond_code = new_code;
> +         invert = false;
> +       }
> +    }
> +
> +  tree cond_val = make_ssa_name (boolean_type_node, NULL);
> +  new_stmt = gimple_build_assign_with_ops (cond_code, cond_val,
> +                                          gimple_cond_lhs (cond),
> +                                          gimple_cond_rhs (cond));
> +  gsi = gsi_last_bb (cond_bb);
> +  gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  /* If we still need inversion, then invert the result of the
> +     condition.  */
> +  if (invert)
> +    {
> +      tree tmp = make_ssa_name (boolean_type_node, NULL);
> +      new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
> +                                              cond_val, boolean_true_node);
> +      gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +      cond_val = tmp;
> +    }
> +
> +  /* Get the condition in the right type so that we can perform
> +     logical and arithmetic operations on it.  */
> +  tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (NOP_EXPR, cond_val_converted,
> +                                          cond_val, NULL_TREE);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR,
> neg_cond_val_converted,
> +                                          cond_val_converted, NULL_TREE);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  tree tmp = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
> +                                          rhs, neg_cond_val_converted);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  tree new_lhs = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, new_lhs,
> +                                          tmp, cond_val_converted);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
> +
> +  /* Note that we optimized this PHI.  */
> +  return true;
> +}
> +
>  /* Auxiliary functions to determine the set of memory accesses which
>     can't trap because they are preceded by accesses to the same memory
>     portion.  We do that for MEM_REFs, so we only need to track
>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
new file mode 100644
index 0000000..0628943
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
@@ -0,0 +1,41 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */
+
+typedef unsigned long int uint64_t;
+typedef long int int64_t;
+int summation_helper_1(int64_t* products, uint64_t count)
+{
+	int s = 0;
+	uint64_t i;
+	for(i=0; i<count; i++)
+	{	
+		int64_t val = (products[i]>0) ? 1 : -1;
+		products[i] *= val;
+		if(products[i] != i)
+			val = -val;
+		products[i] = val;
+		s += val;
+	}
+	return s;
+}
+
+
+int summation_helper_2(int64_t* products, uint64_t count)
+{
+	int s = 0;
+	uint64_t i;
+	for(i=0; i<count; i++)
+	{	
+		int val = (products[i]>0) ? 1 : -1;
+		products[i] *= val;
+		if(products[i] != i)
+			val = -val;
+		products[i] = val;
+		s += val;
+	}
+	return s;
+}
+
+/* { dg-final { scan-tree-dump-times "converted to straightline code" 2 "phiopt1" } } */
+/* { dg-final { cleanup-tree-dump "phiopt1" } } */
+
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 11e565f..96154fb 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -69,6 +69,8 @@  static bool minmax_replacement (basic_block, basic_block,
 				edge, edge, gimple, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 			     edge, edge, gimple, tree, tree);
+static bool neg_replacement (basic_block, basic_block,
+			     edge, edge, gimple, tree, tree);
 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
 				    struct pointer_set_t *);
 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -336,6 +338,23 @@  tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
     /* Calculate the set of non-trapping memory accesses.  */
     nontrap = get_non_trapping ();
 
+  /* The replacement of conditional negation with a non-branching
+     sequence is really only a win when optimizing for speed and we
+     can avoid transformations by gimple if-conversion that result
+     in poor RTL generation.
+
+     Ideally either gimple if-conversion or the RTL expanders will
+     be improved and the code to emit branchless conditional negation
+     can be removed.  */
+  bool replace_conditional_negation = false;
+  if (!do_store_elim)
+    replace_conditional_negation
+      = ((!optimize_size && optimize >= 2)
+	 || (((flag_tree_loop_vectorize || cfun->has_force_vect_loops)
+	      && flag_tree_loop_if_convert != 0)
+	     || flag_tree_loop_if_convert == 1
+	     || flag_tree_loop_if_convert_stores == 1));
+
   /* Search every basic block for COND_EXPR we may be able to optimize.
 
      We walk the blocks in order that guarantees that a block with
@@ -489,6 +508,9 @@  tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
 	    cfgchanged = true;
 	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
+	  else if (replace_conditional_negation
+		   && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+	    cfgchanged = true;
 	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
 	}
@@ -1285,6 +1307,143 @@  abs_replacement (basic_block cond_bb, basic_block middle_bb,
   return true;
 }
 
+/*  The function neg_replacement replaces conditional negation with
+    equivalent straight line code.  Returns TRUE if replacement is done,
+    otherwise returns FALSE.
+
+    COND_BB branches around negation occuring in MIDDLE_BB.
+
+    E0 and E1 are edges out of COND_BB.  E0 reaches MIDDLE_BB and
+    E1 reaches the other successor which should contain PHI with
+    arguments ARG0 and ARG1.
+
+    Assuming negation is to occur when the condition is true,
+    then the non-branching sequence is:
+
+       result = (rhs ^ -cond) + cond
+
+    Inverting the condition or its result gives us negation
+    when the original condition is false.  */
+
+static bool
+neg_replacement (basic_block cond_bb, basic_block middle_bb,
+		 edge e0 ATTRIBUTE_UNUSED, edge e1,
+		 gimple phi, tree arg0, tree arg1)
+{
+  gimple new_stmt, cond;
+  gimple_stmt_iterator gsi;
+  gimple assign;
+  edge true_edge, false_edge;
+  tree rhs, lhs;
+  enum tree_code cond_code;
+  bool invert = false;
+
+  /* This transformation performs logical operations on the
+     incoming arguments.  So force them to be integral types.   */
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
+    return false;
+
+  /* OTHER_BLOCK must have only one executable statement which must have the
+     form arg0 = -arg1 or arg1 = -arg0.  */
+
+  assign = last_and_only_stmt (middle_bb);
+  /* If we did not find the proper negation assignment, then we can not
+     optimize.  */
+  if (assign == NULL)
+    return false;
+
+  /* If we got here, then we have found the only executable statement
+     in OTHER_BLOCK.  If it is anything other than arg0 = -arg1 or
+     arg1 = -arg0, then we can not optimize.  */
+  if (gimple_code (assign) != GIMPLE_ASSIGN)
+    return false;
+
+  lhs = gimple_assign_lhs (assign);
+
+  if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
+    return false;
+
+  rhs = gimple_assign_rhs1 (assign);
+
+  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
+  if (!(lhs == arg0 && rhs == arg1)
+      && !(lhs == arg1 && rhs == arg0))
+    return false;
+
+  /* The basic sequence assumes we negate when the condition is true.
+     If we need the opposite, then we will either need to invert the
+     condition or its result.  */
+  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
+  invert = false_edge->dest == middle_bb;
+
+  /* Unlike abs_replacement, we can handle arbitrary conditionals here.  */
+  cond = last_stmt (cond_bb);
+  cond_code = gimple_cond_code (cond);
+
+  /* If inversion is needed, first try to invert the test since
+     that's cheapest.  */
+  if (invert)
+    {
+      bool honor_nans
+	= HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (cond))));
+      enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
+
+      /* If invert_tree_comparison was successful, then use its return
+	 value as the new code and note that inversion is no longer
+	 needed.  */
+      if (new_code != ERROR_MARK)
+	{
+	  cond_code = new_code;
+	  invert = false;
+	}
+    }
+
+  tree cond_val = make_ssa_name (boolean_type_node, NULL);
+  new_stmt = gimple_build_assign_with_ops (cond_code, cond_val,
+					   gimple_cond_lhs (cond),
+					   gimple_cond_rhs (cond));
+  gsi = gsi_last_bb (cond_bb);
+  gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
+
+  /* If we still need inversion, then invert the result of the
+     condition.  */
+  if (invert)
+    {
+      tree tmp = make_ssa_name (boolean_type_node, NULL);
+      new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
+					       cond_val, boolean_true_node);
+      gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
+      cond_val = tmp;
+    }
+
+  /* Get the condition in the right type so that we can perform
+     logical and arithmetic operations on it.  */
+  tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
+  new_stmt = gimple_build_assign_with_ops (NOP_EXPR, cond_val_converted,
+					   cond_val, NULL_TREE);
+  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
+
+  tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
+  new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, neg_cond_val_converted,
+					   cond_val_converted, NULL_TREE);
+  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
+
+  tree tmp = make_ssa_name (TREE_TYPE (rhs), NULL);
+  new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
+					   rhs, neg_cond_val_converted);
+  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
+
+  tree new_lhs = make_ssa_name (TREE_TYPE (rhs), NULL);
+  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, new_lhs,
+					   tmp, cond_val_converted);
+  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
+
+  replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
+
+  /* Note that we optimized this PHI.  */
+  return true;
+}
+
 /* Auxiliary functions to determine the set of memory accesses which
    can't trap because they are preceded by accesses to the same memory
    portion.  We do that for MEM_REFs, so we only need to track