Factor out division by squares and remove division around comparisons (2/2)

Submitted by Jackson Woodruff on Aug. 10, 2017, 2:10 p.m.

Details

Message ID d9796ede-3704-d4bf-11c1-00e89eae7a64@foss.arm.com
State New
Headers show

Commit Message

Jackson Woodruff Aug. 10, 2017, 2:10 p.m.
Hi all,

The patch implements the some of the division optimizations discussed in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026 .

We now reassociate (as discussed in the bug report):

     x / (y * y) -> x  * (1 / y) * (1 / y)

If it is reasonable to do so. This is done with
-funsafe-math-optimizations.

Bootstrapped and regtested with part (1/2). OK for trunk?

Jackson

gcc/

2017-08-03  Jackson Woodruff  <jackson.woodruff@arm.com>

	PR 71026/tree-optimization
	* tree-ssa-math-opts (is_division_by_square,
	is_square_of, insert_sqaure_reciprocals): New.
	(insert_reciprocals): Change to insert reciprocals
	before a division by a square.
	(execute_cse_reciprocals_1): Change to consider
	division by a square.


gcc/testsuite

2017-08-03  Jackson Woodruff  <jackson.woodruff@arm.com>

	PR 71026/tree-optimization
	* gcc.dg/associate_division_1.c: New.

Comments

Richard Guenther Aug. 15, 2017, 2:07 p.m.
On Thu, Aug 10, 2017 at 4:10 PM, Jackson Woodruff
<jackson.woodruff@foss.arm.com> wrote:
> Hi all,
>
> The patch implements the some of the division optimizations discussed in
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026 .
>
> We now reassociate (as discussed in the bug report):
>
>     x / (y * y) -> x  * (1 / y) * (1 / y)
>
> If it is reasonable to do so. This is done with
> -funsafe-math-optimizations.
>
> Bootstrapped and regtested with part (1/2). OK for trunk?

I believe your enhancement shows the inherent weakness of
CSE of reciprocals in that it works from the defs.  It will
handle x / (y * y) but not x / (y * y * y).

I think a rewrite of this mini-pass is warranted.

Richard.

> Jackson
>
> gcc/
>
> 2017-08-03  Jackson Woodruff  <jackson.woodruff@arm.com>
>
>         PR 71026/tree-optimization
>         * tree-ssa-math-opts (is_division_by_square,
>         is_square_of, insert_sqaure_reciprocals): New.
>         (insert_reciprocals): Change to insert reciprocals
>         before a division by a square.
>         (execute_cse_reciprocals_1): Change to consider
>         division by a square.
>
>
> gcc/testsuite
>
> 2017-08-03  Jackson Woodruff  <jackson.woodruff@arm.com>
>
>         PR 71026/tree-optimization
>         * gcc.dg/associate_division_1.c: New.
>

Patch hide | download patch | download mbox

diff --git a/gcc/testsuite/gcc.dg/associate_division_1.c b/gcc/testsuite/gcc.dg/associate_division_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..187d3597af8825a6a43f29bfaa68b089d2d5bbfe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/associate_division_1.c
@@ -0,0 +1,46 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-optimized" } */
+
+float
+extract_square (float x, float y, float *a, float *b)
+{
+  *a = 3 / (y * y);
+  *b = 5 / (y * y);
+
+  return x / (y * y);
+}
+
+/* Don't expect the 'powmult' (calculation of y * y)
+   to be deleted until a later pass, so look for one
+   more multiplication than strictly necessary.  */
+float
+extract_recip (float *w, float x, float y, float z)
+{
+  *w = 7 / y;
+
+  return x / (y * y) + z / y;
+}
+
+float
+neg_recip (float x, float y, float z)
+{
+  return (x / y) + (z / -y);
+}
+
+float
+const_divisor (float *w, float x, float y, float z)
+{
+  *w = 5 / y;
+  return x / (y * 3.0f) + z / y;
+}
+
+/* 4 For the pointers to a, b, w and w, 4 multiplications in 'extract_square',
+   5 multiplications in 'extract_recip', 0 multiplications
+   in 'neg_recip', 3 multiplcations in 'const_divisor' expected.  */
+/* { dg-final { scan-tree-dump-times " \\* " 14 "optimized" } } */
+
+/* 1 division in 'extract_square', 1 division in 'extract_recip',
+   1 division in 'neg_recip', 1 division in 'const_divisor'.  */
+/* { dg-final { scan-tree-dump-times " / " 4 "optimized" } } */
+
+
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 7ac1659fa0670b7080685f3f9513939807073a63..0db160f68001ffb90942c4002703625430128b9f 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -340,6 +340,38 @@  is_division_by (gimple *use_stmt, tree def)
 	 && gimple_assign_rhs1 (use_stmt) != def;
 }
 
+/* Return whether USE_STMT is DEF * DEF.  */
+static inline bool
+is_square_of (gimple *use_stmt, tree def)
+{
+  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+      && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
+    {
+      tree op0 = gimple_assign_rhs1 (use_stmt);
+      tree op1 = gimple_assign_rhs2 (use_stmt);
+
+      return op0 == op1 && op0 == def;
+    }
+  return 0;
+}
+
+/* Return whether USE_STMT is a floating-point division by
+   DEF * DEF.  */
+static inline bool
+is_division_by_square (gimple *use_stmt, tree def)
+{
+  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+      && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR)
+    {
+      tree denominator = gimple_assign_rhs2 (use_stmt);
+      if (TREE_CODE (denominator) == SSA_NAME)
+	{
+	  return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
+	}
+    }
+  return 0;
+}
+
 /* Walk the subset of the dominator tree rooted at OCC, setting the
    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
    the given basic block.  The field may be left NULL, of course,
@@ -369,14 +401,16 @@  insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 				      build_one_cst (type), def);
 
       if (occ->bb_has_division)
-        {
-          /* Case 1: insert before an existing division.  */
-          gsi = gsi_after_labels (occ->bb);
-          while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
+	{
+	  /* Case 1: insert before an existing division.  */
+	  gsi = gsi_after_labels (occ->bb);
+	  while (!gsi_end_p (gsi)
+		 && (!is_division_by (gsi_stmt (gsi), def))
+		 && (!is_division_by_square (gsi_stmt (gsi), def)))
 	    gsi_next (&gsi);
 
-          gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
-        }
+	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+	}
       else if (def_gsi && occ->bb == def_gsi->bb)
         {
           /* Case 2: insert right after the definition.  Note that this will
@@ -403,6 +437,80 @@  insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 }
 
 
+/* Walk the tree until we find the first division by a square.  Insert
+   the definition of the reciprocal there.  This returns that definition,
+   or if there is an alternate definition earlier, then it returns that
+   instead.  */
+
+static tree
+insert_square_reciprocals (struct occurrence *occ, tree def)
+{
+  gimple_stmt_iterator gsi = gsi_after_labels (occ->bb);
+
+  while (!gsi_end_p (gsi)
+	 && !is_division_by (gsi_stmt (gsi), def)
+	 /* Check to see if a calculation statement has already
+	    been inserted.  */
+	 && !is_square_of (gsi_stmt (gsi), occ->recip_def))
+    gsi_next (&gsi);
+
+  if (gsi_end_p (gsi))
+      return NULL;
+  else if (is_square_of (gsi_stmt (gsi), occ->recip_def))
+      return gimple_assign_lhs (gsi_stmt (gsi));
+  else
+    {
+      tree type = TREE_TYPE (def);
+      tree recip_square_def = create_tmp_reg (type, "powmult_reciptmp");
+      gassign *new_stmt = gimple_build_assign (recip_square_def, MULT_EXPR,
+					       occ->recip_def, occ->recip_def);
+      gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+      return recip_square_def;
+    }
+}
+
+/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
+   Unlike replace_reciprocals, we expect use_p to be a square definition
+   (i.e. use_p is _ = x * x).  Iterate though all uses of this square
+   and replace them.  */
+static inline void
+replace_reciprocal_squares (use_operand_p use_p)
+{
+  gimple *use_stmt = USE_STMT (use_p);
+  basic_block bb = gimple_bb (use_stmt);
+  struct occurrence *occ = (struct occurrence *) bb->aux;
+  imm_use_iterator use_iter;
+  tree def_name = gimple_assign_lhs (use_stmt);
+  use_operand_p square_use_p;
+  tree squared_reciprocal = insert_square_reciprocals (occ, def_name);
+
+  if (optimize_bb_for_speed_p (bb) && squared_reciprocal
+      && occ->recip_def && use_stmt != occ->recip_def_stmt)
+    {
+
+      /* Find all occurrences of the use name and replace
+	 them by multiplications of this new inverse.  */
+      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def_name)
+	{
+	  FOR_EACH_IMM_USE_ON_STMT (square_use_p, use_iter)
+	    {
+	      gimple *square_use = USE_STMT (square_use_p);
+
+	      if (gimple_assign_rhs_code (square_use) == RDIV_EXPR)
+		{
+		  gimple_assign_set_rhs_code (square_use, MULT_EXPR);
+		  gimple_assign_set_rhs2 (square_use, squared_reciprocal);
+		  SET_USE (square_use_p, squared_reciprocal);
+
+		  reciprocal_stats.rdivs_inserted++;
+		  update_stmt (square_use);
+		}
+	    }
+	}
+    }
+}
+
+
 /* Replace the division at USE_P with a multiplication by the reciprocal, if
    possible.  */
 
@@ -460,10 +568,12 @@  free_bb (struct occurrence *occ)
 static void
 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 {
-  use_operand_p use_p;
-  imm_use_iterator use_iter;
+  use_operand_p use_p, square_use_p;
+  imm_use_iterator use_iter, square_use_iter;
+  tree square_def;
   struct occurrence *occ;
   int count = 0, threshold;
+  int square_recip_count = 0;
 
   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 
@@ -475,11 +585,26 @@  execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 	  register_division_in (gimple_bb (use_stmt));
 	  count++;
 	}
+
+      if (is_square_of (use_stmt, def))
+	{
+	  square_def = gimple_assign_lhs (use_stmt);
+	  FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
+	    {
+	      gimple *square_use_stmt = USE_STMT (square_use_p);
+	      if (is_division_by (square_use_stmt, square_def))
+		{
+		  register_division_in (gimple_bb (square_use_stmt));
+		  square_recip_count++;
+		}
+	    }
+	}
     }
 
   /* Do the expensive part only if we can hope to optimize something.  */
   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
-  if (count >= threshold)
+  if (count + square_recip_count >= threshold
+      && count >= 1)
     {
       gimple *use_stmt;
       for (occ = occ_head; occ; occ = occ->next)
@@ -495,6 +620,11 @@  execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 		replace_reciprocal (use_p);
 	    }
+	  else if (is_square_of (use_stmt, def))
+	    {
+	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
+		replace_reciprocal_squares (use_p);
+	    }
 	}
     }