diff mbox

[RFC,PR70841] Reassoc fails to handle FP division

Message ID 572ABEFF.1070605@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah May 5, 2016, 3:33 a.m. UTC
Hi,

I tried to handle reassoc fails to handle FP division. I think the best 
way to do this is to do like what is done for MINUS_EXPR. I.e, convert 
the RDIV_EXPR to MULT_EXPR by (1/x) early and later in 
opt_rdiv_with_multiply, optimize it.

Here is a patch that passes bootstrap and regression testing on 
x86-64-linux-gnu.

Does this look Ok for trunk?

Thanks,
Kugan

gcc/testsuite/ChangeLog:

2016-05-05  Kugan Vivekanandarajah  <kuganv@linaro.org>

	PR middle-end/70841
	* gcc.dg/tree-ssa/pr70841.c: New test.

gcc/ChangeLog:

2016-05-05  Kugan Vivekanandarajah  <kuganv@linaro.org>

	PR middle-end/70841
	* tree-ssa-reassoc.c (should_break_up_rdiv): New.
	(break_up_rdiv): New
	(break_up_subtract_bb): Call should_break_up_rdiv and break_up_rdiv.
	(do_reassoc): Rename break_up_subtract_bb to break_up_subtract_and_div_bb.
	(sort_cmp_int): New.
	(opt_rdiv_with_multiply): New.
	(reassociate_bb): Call opt_rdiv_with_multiply.
	(do_reassoc): Renamed called function break_up_subtract_bb to
	break_up_subtract_and_div_bb.

Comments

Richard Biener May 9, 2016, 12:16 p.m. UTC | #1
On Thu, May 5, 2016 at 5:33 AM, kugan <kugan.vivekanandarajah@linaro.org> wrote:
> Hi,
>
> I tried to handle reassoc fails to handle FP division. I think the best way
> to do this is to do like what is done for MINUS_EXPR. I.e, convert the
> RDIV_EXPR to MULT_EXPR by (1/x) early and later in opt_rdiv_with_multiply,
> optimize it.
>
> Here is a patch that passes bootstrap and regression testing on
> x86-64-linux-gnu.
>
> Does this look Ok for trunk?

Implementation-wise I'd rather do opt_rdiv_with_multiply from
optimize_ops_list like
eliminate_plus_minus_pair is implemented.

But then in general I am concerned about not doing sth like reproagate_negates.

That is, this patch will turn a / b into a * (1/b) and I think with
just -freciprocal-math
nothing will turn it back to a / b.  We also may do sth like a * b / c
-> a * (1/c) * b
-> (a / c) * b which is generally undesirable if we are not able to
cancel out sth
due to the change of the FP outcome this generally causes.

So in the end while my suggestion in the PR "works" it may not be something
we want to implement.  Eventually we want to treat divisions as association
barriers unless we can make them cancel out.

I also don't particularly like the way reassoc handles minus/negates
by re-writing
the GIMPLE IL.  I'd very much rather have a op->negate flag (or for reciprocals
op->recip).

With -freciprocal-math we made sure (back in time) that SPEC 2006
doesn't miscompare
when using it, so I'd like to verify that is still the case when doing
sth like with this patch.

Thanks,
Richard.

> Thanks,
> Kugan
>
> gcc/testsuite/ChangeLog:
>
> 2016-05-05  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         PR middle-end/70841
>         * gcc.dg/tree-ssa/pr70841.c: New test.
>
> gcc/ChangeLog:
>
> 2016-05-05  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>         PR middle-end/70841
>         * tree-ssa-reassoc.c (should_break_up_rdiv): New.
>         (break_up_rdiv): New
>         (break_up_subtract_bb): Call should_break_up_rdiv and break_up_rdiv.
>         (do_reassoc): Rename break_up_subtract_bb to
> break_up_subtract_and_div_bb.
>         (sort_cmp_int): New.
>         (opt_rdiv_with_multiply): New.
>         (reassociate_bb): Call opt_rdiv_with_multiply.
>         (do_reassoc): Renamed called function break_up_subtract_bb to
>         break_up_subtract_and_div_bb.
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c b/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c
index e69de29..0b456aa 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c
@@ -0,0 +1,15 @@ 
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -freciprocal-math -fdump-tree-optimized" } */
+
+float foo (float x, float y)
+{
+    return x * y / x;
+}
+
+float foo2 (float x, float y)
+{
+    return (y / x) * x ;
+}
+
+/* { dg-final { scan-tree-dump-times "return y_" 2 "optimized" } } */
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index d23dabd..29a5422 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -4168,6 +4168,40 @@  should_break_up_subtract (gimple *stmt)
   return false;
 }
 
+/* Return true if we should break up the RDIV in STMT into an  MULT_EXPR
+   with reciprocal.  */
+
+static bool
+should_break_up_rdiv (gimple *stmt)
+{
+  tree lhs = gimple_assign_lhs (stmt);
+  tree binlhs = gimple_assign_rhs1 (stmt);
+  tree binrhs = gimple_assign_rhs2 (stmt);
+  gimple *immusestmt;
+
+  if (VECTOR_TYPE_P (TREE_TYPE (lhs)))
+    return false;
+
+  if (TREE_CODE (lhs) == SSA_NAME
+      && (immusestmt = get_single_immediate_use (lhs))
+      && is_gimple_assign (immusestmt)
+      && gimple_assign_rhs_code (immusestmt) == MULT_EXPR)
+    return true;
+  if (TREE_CODE (binlhs) == SSA_NAME
+      && (immusestmt = SSA_NAME_DEF_STMT (binlhs))
+      && get_single_immediate_use (binlhs)
+      && is_gimple_assign (immusestmt)
+      && gimple_assign_rhs_code (immusestmt) == MULT_EXPR)
+    return true;
+  if (TREE_CODE (binrhs) == SSA_NAME
+      && (immusestmt = SSA_NAME_DEF_STMT (binrhs))
+      && get_single_immediate_use (binrhs)
+      && is_gimple_assign (immusestmt)
+      && gimple_assign_rhs_code (immusestmt) == MULT_EXPR)
+    return true;
+  return false;
+}
+
 /* Transform STMT from A - B into A + -B.  */
 
 static void
@@ -4187,6 +4221,23 @@  break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
   update_stmt (stmt);
 }
 
+/* Transform STMT from A / B into A X (1/B).  */
+static void
+break_up_rdiv (gimple *stmt, gimple_stmt_iterator *gsip)
+{
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+  tree tmp = make_ssa_name (TREE_TYPE (rhs1));
+  tree one = fold_convert (TREE_TYPE (rhs1),
+			   build_int_cst (integer_type_node, 1));
+  gassign *div_stmt = gimple_build_assign (tmp, RDIV_EXPR, one, rhs2);
+  gimple_set_uid (div_stmt, gimple_uid (stmt));
+  gsi_insert_before (gsip, div_stmt, GSI_NEW_STMT);
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gimple_assign_set_rhs_with_ops (&gsi, MULT_EXPR, rhs1, tmp);
+  update_stmt (stmt);
+}
+
 /* Determine whether STMT is a builtin call that raises an SSA name
    to an integer power and has only one use.  If so, and this is early
    reassociation and unsafe math optimizations are permitted, place
@@ -4492,7 +4543,7 @@  can_reassociate_p (tree op)
    and set UIDs within each basic block.  */
 
 static void
-break_up_subtract_bb (basic_block bb)
+break_up_subtract_and_div_bb (basic_block bb)
 {
   gimple_stmt_iterator gsi;
   basic_block son;
@@ -4522,6 +4573,15 @@  break_up_subtract_bb (basic_block bb)
 	  if (should_break_up_subtract (stmt))
 	    break_up_subtract (stmt, &gsi);
 	}
+      else if (flag_reciprocal_math
+	  && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
+	{
+	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
+	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
+	    continue;
+	  if (should_break_up_rdiv (stmt))
+	    break_up_rdiv (stmt, &gsi);
+	}
       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
 	plus_negates.safe_push (gimple_assign_lhs (stmt));
@@ -4529,7 +4589,7 @@  break_up_subtract_bb (basic_block bb)
   for (son = first_dom_son (CDI_DOMINATORS, bb);
        son;
        son = next_dom_son (CDI_DOMINATORS, son))
-    break_up_subtract_bb (son);
+    break_up_subtract_and_div_bb (son);
 }
 
 /* Used for repeated factor analysis.  */
@@ -5015,6 +5075,67 @@  transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
     }
 }
 
+/* Helper to sort int in descending order.  */
+static int
+sort_cmp_int (const void *pa, const void *pb)
+{
+  const int a = *(const int *)pa;
+  const int b = *(const int *)pb;
+  return b - a;
+}
+
+/* For -freciprocal-math, optimize MULT_EXPR of (x) and (1/x).  */
+
+static bool
+opt_rdiv_with_multiply (vec<operand_entry *> *ops)
+{
+  bool changed = false;
+  vec<int> indxs = vNULL;
+  /* FIXME Since we do O(n^2) comaprsions, skip this optimization if we have
+     too many operands.  This is going to be very rare.  */
+  if (ops->length () > 10)
+    return false;
+
+  for (unsigned int i = 0; i < ops->length (); ++i)
+    {
+      tree op = (*ops)[i]->op;
+      if (TREE_CODE (op) != SSA_NAME
+	  || !is_gimple_assign (SSA_NAME_DEF_STMT (op)))
+	continue;
+
+      gimple *def_stmt = SSA_NAME_DEF_STMT (op);
+      enum tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
+      tree rhs1 = gimple_assign_rhs1 (def_stmt);
+      tree rhs2 = gimple_assign_rhs2 (def_stmt);
+
+      if (rhs_code == RDIV_EXPR
+	  && TREE_CODE (rhs1) == REAL_CST
+	  && real_equal (&TREE_REAL_CST (rhs1), &dconst1))
+	{
+	  for (unsigned int j = 0; j < ops->length (); ++j)
+	    {
+	      tree op = (*ops)[j]->op;
+	      if (op == rhs2)
+		{
+		  indxs.safe_push (i);
+		  indxs.safe_push (j);
+		}
+	    }
+	}
+    }
+
+  if (indxs.length () > 0)
+    {
+      changed = true;
+      /* Sort the indexs in descending order and remove from the back.  */
+      indxs.qsort (sort_cmp_int);
+      for (unsigned int i = 0; i < indxs.length () ; ++i)
+	ops->unordered_remove (indxs[i]);
+    }
+
+  return changed;
+}
+
 /* Reassociate expressions in basic block BB and its post-dominator as
    children.
 
@@ -5110,6 +5231,11 @@  reassociate_bb (basic_block bb)
 		  optimize_ops_list (rhs_code, &ops);
 		}
 
+	      if (rhs_code == MULT_EXPR
+		  && flag_reciprocal_math
+		  && opt_rdiv_with_multiply (&ops))
+		ops.qsort (sort_by_operand_rank);
+
 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
 		{
 		  if (is_vector)
@@ -5308,7 +5434,7 @@  debug_ops_vector (vec<operand_entry *> ops)
 static bool
 do_reassoc (void)
 {
-  break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+  break_up_subtract_and_div_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
 }