diff mbox series

Slightly improve phiopt value_replacement optimization (PR middle-end/62263, PR middle-end/82498)

Message ID 20171013194310.GM14653@tucnak
State New
Headers show
Series Slightly improve phiopt value_replacement optimization (PR middle-end/62263, PR middle-end/82498) | expand

Commit Message

Jakub Jelinek Oct. 13, 2017, 7:43 p.m. UTC
Hi!

First of all, there was a typo, we are optimizing
(x != 0) ? x + y : y
into x + y rather than y.

And, as the comment mentions, the condition that there is just a single
stmt is too restrictive and in the various patterns posted in the various
rotate related PRs there are cases where in addition to the last
assign stmt there are some preparation statements (sometimes conversion,
sometimes bit masking with constant, sometimes both).
I think it is undesirable to turn the conditional code into always executed
one if it is too large, so this patch allows just very few very cheap
preparation statements which feed each other and it is easily possible to
propagate the cond_rhs constant through those statements to compute what
the result from those would be (and make sure no UB).

With this patch on top of the previous one, on rotate-8.c testcase
on x86_64-linux and i686-linux we get the most efficient code in all cases
- just a rol/ror instruction with perhaps some moves into registers needed
to perform that, but without any conditionals, masking etc.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-10-13  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/62263
	PR middle-end/82498
	* tree-ssa-phiopt.c (value_replacement): Comment fix.  Handle
	up to 2 preparation statements for ASSIGN in MIDDLE_BB.

	* c-c++-common/rotate-8.c: Expect no PHIs in optimized dump.


	Jakub

Comments

Richard Biener Oct. 14, 2017, 6:31 p.m. UTC | #1
On October 13, 2017 9:43:10 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>First of all, there was a typo, we are optimizing
>(x != 0) ? x + y : y
>into x + y rather than y.
>
>And, as the comment mentions, the condition that there is just a single
>stmt is too restrictive and in the various patterns posted in the
>various
>rotate related PRs there are cases where in addition to the last
>assign stmt there are some preparation statements (sometimes
>conversion,
>sometimes bit masking with constant, sometimes both).
>I think it is undesirable to turn the conditional code into always
>executed
>one if it is too large, so this patch allows just very few very cheap
>preparation statements which feed each other and it is easily possible
>to
>propagate the cond_rhs constant through those statements to compute
>what
>the result from those would be (and make sure no UB).
>
>With this patch on top of the previous one, on rotate-8.c testcase
>on x86_64-linux and i686-linux we get the most efficient code in all
>cases
>- just a rol/ror instruction with perhaps some moves into registers
>needed
>to perform that, but without any conditionals, masking etc.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 

>2017-10-13  Jakub Jelinek  <jakub@redhat.com>
>
>	PR middle-end/62263
>	PR middle-end/82498
>	* tree-ssa-phiopt.c (value_replacement): Comment fix.  Handle
>	up to 2 preparation statements for ASSIGN in MIDDLE_BB.
>
>	* c-c++-common/rotate-8.c: Expect no PHIs in optimized dump.
>
>--- gcc/tree-ssa-phiopt.c.jj	2017-10-10 22:04:08.000000000 +0200
>+++ gcc/tree-ssa-phiopt.c	2017-10-13 17:55:01.578450763 +0200
>@@ -995,11 +995,13 @@ value_replacement (basic_block cond_bb,
> 
>     }
> 
>-  /* Now optimize (x != 0) ? x + y : y to just y.
>-     The following condition is too restrictive, there can easily be
>another
>-     stmt in middle_bb, for instance a CONVERT_EXPR for the second
>argument.  */
>-  gimple *assign = last_and_only_stmt (middle_bb);
>-  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
>+  /* Now optimize (x != 0) ? x + y : y to just x + y.  */
>+  gsi = gsi_last_nondebug_bb (middle_bb);
>+  if (gsi_end_p (gsi))
>+    return 0;
>+
>+  gimple *assign = gsi_stmt (gsi);
>+  if (!is_gimple_assign (assign)
>       || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
>       || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> 	  && !POINTER_TYPE_P (TREE_TYPE (arg0))))
>@@ -1009,6 +1011,71 @@ value_replacement (basic_block cond_bb,
>   if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
>     return 0;
> 
>+  /* Allow up to 2 cheap preparation statements that prepare argument
>+     for assign, e.g.:
>+      if (y_4 != 0)
>+	goto <bb 3>;
>+      else
>+	goto <bb 4>;
>+     <bb 3>:
>+      _1 = (int) y_4;
>+      iftmp.0_6 = x_5(D) r<< _1;
>+     <bb 4>:
>+      # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)>
>+     or:
>+      if (y_3(D) == 0)
>+	goto <bb 4>;
>+      else
>+	goto <bb 3>;
>+     <bb 3>:
>+      y_4 = y_3(D) & 31;
>+      _1 = (int) y_4;
>+      _6 = x_5(D) r<< _1;
>+     <bb 4>:
>+      # _2 = PHI <x_5(D)(2), _6(3)>  */
>+  gimple *prep_stmt[2] = { NULL, NULL };
>+  int prep_cnt;
>+  for (prep_cnt = 0; ; prep_cnt++)
>+    {
>+      gsi_prev_nondebug (&gsi);
>+      if (gsi_end_p (gsi))
>+	break;
>+
>+      gimple *g = gsi_stmt (gsi);
>+      if (gimple_code (g) == GIMPLE_LABEL)
>+	break;
>+
>+      if (prep_cnt == 2 || !is_gimple_assign (g))
>+	return 0;
>+
>+      tree lhs = gimple_assign_lhs (g);
>+      tree rhs1 = gimple_assign_rhs1 (g);
>+      use_operand_p use_p;
>+      gimple *use_stmt;
>+      if (TREE_CODE (lhs) != SSA_NAME
>+	  || TREE_CODE (rhs1) != SSA_NAME
>+	  || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>+	  || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
>+	  || !single_imm_use (lhs, &use_p, &use_stmt)
>+	  || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign))
>+	return 0;
>+      switch (gimple_assign_rhs_code (g))
>+	{
>+	CASE_CONVERT:
>+	  break;
>+	case PLUS_EXPR:
>+	case BIT_AND_EXPR:
>+	case BIT_IOR_EXPR:
>+	case BIT_XOR_EXPR:
>+	  if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST)
>+	    return 0;
>+	  break;
>+	default:
>+	  return 0;
>+	}
>+      prep_stmt[prep_cnt] = g;
>+    }
>+
>   /* Only transform if it removes the condition.  */
>if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
>e0, e1))
>     return 0;
>@@ -1019,7 +1086,7 @@ value_replacement (basic_block cond_bb,
>       && profile_status_for_fn (cfun) != PROFILE_ABSENT
>&& EDGE_PRED (middle_bb, 0)->probability < profile_probability::even ()
>       /* If assign is cheap, there is no point avoiding it.  */
>-      && estimate_num_insns (assign, &eni_time_weights)
>+      && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights)
> 	 >= 3 * estimate_num_insns (cond, &eni_time_weights))
>     return 0;
> 
>@@ -1030,6 +1097,32 @@ value_replacement (basic_block cond_bb,
>   tree cond_lhs = gimple_cond_lhs (cond);
>   tree cond_rhs = gimple_cond_rhs (cond);
> 
>+  /* Propagate the cond_rhs constant through preparation stmts,
>+     make sure UB isn't invoked while doing that.  */
>+  for (int i = prep_cnt - 1; i >= 0; --i)
>+    {
>+      gimple *g = prep_stmt[i];
>+      tree grhs1 = gimple_assign_rhs1 (g);
>+      if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1))
>+	return 0;
>+      cond_lhs = gimple_assign_lhs (g);
>+      cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs);
>+      if (TREE_CODE (cond_rhs) != INTEGER_CST
>+	  || TREE_OVERFLOW (cond_rhs))
>+	return 0;
>+      if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS)
>+	{
>+	  cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs,
>+				      gimple_assign_rhs2 (g));
>+	  if (TREE_OVERFLOW (cond_rhs))
>+	    return 0;
>+	}
>+      cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs);
>+      if (TREE_CODE (cond_rhs) != INTEGER_CST
>+	  || TREE_OVERFLOW (cond_rhs))
>+	return 0;
>+    }
>+
>   if (((code == NE_EXPR && e1 == false_edge)
> 	|| (code == EQ_EXPR && e1 == true_edge))
>       && arg0 == lhs
>@@ -1071,7 +1164,15 @@ value_replacement (basic_block cond_bb,
> 	    duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires),
> 					   phires_range_info);
> 	}
>-      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
>+      gimple_stmt_iterator gsi_from;
>+      for (int i = prep_cnt - 1; i >= 0; --i)
>+	{
>+	  tree plhs = gimple_assign_lhs (prep_stmt[i]);
>+	  SSA_NAME_RANGE_INFO (plhs) = NULL;
>+	  gsi_from = gsi_for_stmt (prep_stmt[i]);
>+	  gsi_move_before (&gsi_from, &gsi);
>+	}
>+      gsi_from = gsi_for_stmt (assign);
>       gsi_move_before (&gsi_from, &gsi);
>       replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
>       return 2;
>--- gcc/testsuite/c-c++-common/rotate-8.c.jj	2017-10-13
>15:55:32.000000000 +0200
>+++ gcc/testsuite/c-c++-common/rotate-8.c	2017-10-13 17:57:32.861627651
>+0200
>@@ -3,6 +3,7 @@
> /* { dg-do compile } */
> /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
>/* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } }
>*/
>+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
> 
> unsigned int
> f1 (unsigned int x, unsigned char y)
>
>	Jakub
diff mbox series

Patch

--- gcc/tree-ssa-phiopt.c.jj	2017-10-10 22:04:08.000000000 +0200
+++ gcc/tree-ssa-phiopt.c	2017-10-13 17:55:01.578450763 +0200
@@ -995,11 +995,13 @@  value_replacement (basic_block cond_bb,
 
     }
 
-  /* Now optimize (x != 0) ? x + y : y to just y.
-     The following condition is too restrictive, there can easily be another
-     stmt in middle_bb, for instance a CONVERT_EXPR for the second argument.  */
-  gimple *assign = last_and_only_stmt (middle_bb);
-  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
+  /* Now optimize (x != 0) ? x + y : y to just x + y.  */
+  gsi = gsi_last_nondebug_bb (middle_bb);
+  if (gsi_end_p (gsi))
+    return 0;
+
+  gimple *assign = gsi_stmt (gsi);
+  if (!is_gimple_assign (assign)
       || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
       || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
 	  && !POINTER_TYPE_P (TREE_TYPE (arg0))))
@@ -1009,6 +1011,71 @@  value_replacement (basic_block cond_bb,
   if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
     return 0;
 
+  /* Allow up to 2 cheap preparation statements that prepare argument
+     for assign, e.g.:
+      if (y_4 != 0)
+	goto <bb 3>;
+      else
+	goto <bb 4>;
+     <bb 3>:
+      _1 = (int) y_4;
+      iftmp.0_6 = x_5(D) r<< _1;
+     <bb 4>:
+      # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)>
+     or:
+      if (y_3(D) == 0)
+	goto <bb 4>;
+      else
+	goto <bb 3>;
+     <bb 3>:
+      y_4 = y_3(D) & 31;
+      _1 = (int) y_4;
+      _6 = x_5(D) r<< _1;
+     <bb 4>:
+      # _2 = PHI <x_5(D)(2), _6(3)>  */
+  gimple *prep_stmt[2] = { NULL, NULL };
+  int prep_cnt;
+  for (prep_cnt = 0; ; prep_cnt++)
+    {
+      gsi_prev_nondebug (&gsi);
+      if (gsi_end_p (gsi))
+	break;
+
+      gimple *g = gsi_stmt (gsi);
+      if (gimple_code (g) == GIMPLE_LABEL)
+	break;
+
+      if (prep_cnt == 2 || !is_gimple_assign (g))
+	return 0;
+
+      tree lhs = gimple_assign_lhs (g);
+      tree rhs1 = gimple_assign_rhs1 (g);
+      use_operand_p use_p;
+      gimple *use_stmt;
+      if (TREE_CODE (lhs) != SSA_NAME
+	  || TREE_CODE (rhs1) != SSA_NAME
+	  || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+	  || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+	  || !single_imm_use (lhs, &use_p, &use_stmt)
+	  || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign))
+	return 0;
+      switch (gimple_assign_rhs_code (g))
+	{
+	CASE_CONVERT:
+	  break;
+	case PLUS_EXPR:
+	case BIT_AND_EXPR:
+	case BIT_IOR_EXPR:
+	case BIT_XOR_EXPR:
+	  if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST)
+	    return 0;
+	  break;
+	default:
+	  return 0;
+	}
+      prep_stmt[prep_cnt] = g;
+    }
+
   /* Only transform if it removes the condition.  */
   if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1))
     return 0;
@@ -1019,7 +1086,7 @@  value_replacement (basic_block cond_bb,
       && profile_status_for_fn (cfun) != PROFILE_ABSENT
       && EDGE_PRED (middle_bb, 0)->probability < profile_probability::even ()
       /* If assign is cheap, there is no point avoiding it.  */
-      && estimate_num_insns (assign, &eni_time_weights)
+      && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights)
 	 >= 3 * estimate_num_insns (cond, &eni_time_weights))
     return 0;
 
@@ -1030,6 +1097,32 @@  value_replacement (basic_block cond_bb,
   tree cond_lhs = gimple_cond_lhs (cond);
   tree cond_rhs = gimple_cond_rhs (cond);
 
+  /* Propagate the cond_rhs constant through preparation stmts,
+     make sure UB isn't invoked while doing that.  */
+  for (int i = prep_cnt - 1; i >= 0; --i)
+    {
+      gimple *g = prep_stmt[i];
+      tree grhs1 = gimple_assign_rhs1 (g);
+      if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1))
+	return 0;
+      cond_lhs = gimple_assign_lhs (g);
+      cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs);
+      if (TREE_CODE (cond_rhs) != INTEGER_CST
+	  || TREE_OVERFLOW (cond_rhs))
+	return 0;
+      if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS)
+	{
+	  cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs,
+				      gimple_assign_rhs2 (g));
+	  if (TREE_OVERFLOW (cond_rhs))
+	    return 0;
+	}
+      cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs);
+      if (TREE_CODE (cond_rhs) != INTEGER_CST
+	  || TREE_OVERFLOW (cond_rhs))
+	return 0;
+    }
+
   if (((code == NE_EXPR && e1 == false_edge)
 	|| (code == EQ_EXPR && e1 == true_edge))
       && arg0 == lhs
@@ -1071,7 +1164,15 @@  value_replacement (basic_block cond_bb,
 	    duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires),
 					   phires_range_info);
 	}
-      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
+      gimple_stmt_iterator gsi_from;
+      for (int i = prep_cnt - 1; i >= 0; --i)
+	{
+	  tree plhs = gimple_assign_lhs (prep_stmt[i]);
+	  SSA_NAME_RANGE_INFO (plhs) = NULL;
+	  gsi_from = gsi_for_stmt (prep_stmt[i]);
+	  gsi_move_before (&gsi_from, &gsi);
+	}
+      gsi_from = gsi_for_stmt (assign);
       gsi_move_before (&gsi_from, &gsi);
       replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
       return 2;
--- gcc/testsuite/c-c++-common/rotate-8.c.jj	2017-10-13 15:55:32.000000000 +0200
+++ gcc/testsuite/c-c++-common/rotate-8.c	2017-10-13 17:57:32.861627651 +0200
@@ -3,6 +3,7 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
 /* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
 
 unsigned int
 f1 (unsigned int x, unsigned char y)