Patchwork [tree-optimization] : 4 of 6 Improve reassoc for bitwise operations

login
register
mail settings
Submitter Kai Tietz
Date Oct. 7, 2011, 4:39 p.m.
Message ID <CAEwic4bbMr4PhHnx_ox4Jd7N=A2pMPm2OenaOLGaOWyPZ7PKtA@mail.gmail.com>
Download mbox | patch
Permalink /patch/118353/
State New
Headers show

Comments

Kai Tietz - Oct. 7, 2011, 4:39 p.m.
Hello,

This patch adds to the repropagation pass for bitwise-expression the
conversion of (X != 0) | (Y != 0) -> (X | Y) != 0, and of (X == 0) & (Y == 0)
-> (X | Y) == 0.

ChangeLog

2011-10-07  Kai Tietz  <ktietz@redhat.com>

	* tree-ssa-reassoc.c (walk_bitwise_stmt_elems): Add new argument
	to store compare to zero elements.
	(merge_bitwise_compares): Helper to do reconstructing merged
	comparison block from vector by grouping.
	(operate_bitwise_stmt): Use merge_bitwise_compares.

2011-10-07  Kai Tietz  <ktietz@redhat.com>

	* gcc.dg/tree-ssa/reassoc-repro_cmpior-1.c: New file.
	* gcc.dg/tree-ssa/reassoc-repro_cmpior-2.c: New file.
	* gcc.dg/tree-ssa/reassoc-repro_cmpior-3.c: New file.

Bootstrapped and regression-tested for all languages plus Ada and
Obj-C++ on x86_64-pc-linux-gnu.
Ok for apply?

Regards,
Kai

Patch

Index: gcc/gcc/tree-ssa-reassoc.c
===================================================================
--- gcc.orig/gcc/tree-ssa-reassoc.c
+++ gcc/gcc/tree-ssa-reassoc.c
@@ -3110,16 +3110,18 @@  linearize_expr_tree (VEC(operand_entry_t
   add_to_ops_vec (ops, binrhs);
 }

-/* Split up a binary tree-chain of code CODE - starting at STMT - into three
+/* Split up a binary tree-chain of code CODE - starting at STMT - into four
    different kinds:
    - vector VCST stores constant values.
    - vector VNOT stores bitwise-not expressions.
+   - vector VCMP_ZERO stores comparisons to zero.
    - vector VEXRR stores the remaining rest. */

 static void
 walk_bitwise_stmt_elems (gimple stmt, enum tree_code code,
 			 VEC(tree, heap) **vcst,
 			 VEC(tree, heap) **vnot,
+			 VEC(tree, heap) **vcmp_zero,
 			 VEC(tree, heap) **vexpr)
 {
   gimple s;
@@ -3134,9 +3136,19 @@  walk_bitwise_stmt_elems (gimple stmt, en
 	   || !has_single_use (l))
     VEC_safe_push (tree, heap, *vexpr, l);
   else if (gimple_assign_rhs_code (s) == code)
-    walk_bitwise_stmt_elems (s, code, vcst, vnot, vexpr);
+    walk_bitwise_stmt_elems (s, code, vcst, vnot, vcmp_zero, vexpr);
   else if (gimple_assign_rhs_code (s) == BIT_NOT_EXPR)
     VEC_safe_push (tree, heap, *vnot, l);
+  /* (A == 0) & (B == 0) -> (A | B) == 0
+     (A != 0) | (B != 0) -> (A | B) != 0.  */
+  else if (((code == BIT_AND_EXPR
+  	     && gimple_assign_rhs_code (s) == EQ_EXPR)
+  	    || (code == BIT_IOR_EXPR
+  	        && gimple_assign_rhs_code (s) == NE_EXPR))
+  	   && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (s)))
+  	   && TREE_CODE (gimple_assign_rhs2 (s)) == INTEGER_CST
+  	   && integer_zerop (gimple_assign_rhs2 (s)))
+    VEC_safe_push (tree, heap, *vcmp_zero, l);
   else
     VEC_safe_push (tree, heap, *vexpr, l);

@@ -3153,9 +3165,19 @@  walk_bitwise_stmt_elems (gimple stmt, en
       || !has_single_use (l))
     VEC_safe_push (tree, heap, *vexpr, l);
   else if (gimple_assign_rhs_code (s) == code)
-    walk_bitwise_stmt_elems (s, code, vcst, vnot, vexpr);
+    walk_bitwise_stmt_elems (s, code, vcst, vnot, vcmp_zero, vexpr);
   else if (gimple_assign_rhs_code (s) == BIT_NOT_EXPR)
     VEC_safe_push (tree, heap, *vnot, l);
+  /* (A == 0) & (B == 0) -> (A | B) == 0
+     (A != 0) | (B != 0) -> (A | B) != 0.  */
+  else if (((code == BIT_AND_EXPR
+  	     && gimple_assign_rhs_code (s) == EQ_EXPR)
+  	    || (code == BIT_IOR_EXPR
+  	        && gimple_assign_rhs_code (s) == NE_EXPR))
+  	   && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (s)))
+  	   && TREE_CODE (gimple_assign_rhs2 (s)) == INTEGER_CST
+  	   && integer_zerop (gimple_assign_rhs2 (s)))
+    VEC_safe_push (tree, heap, *vcmp_zero, l);
   else
     VEC_safe_push (tree, heap, *vexpr, l);
 }
@@ -3289,6 +3311,209 @@  operate_bitwise_xor_stmt (gimple_stmt_it
   return true;
 }

+/* This function merges:compares
+   It merges VCMP elements
+   - (X != 0) | (Y != 0) -> (X | Y) != 0
+   - (X == 0) & (Y == 0) -> (X | Y) == 0.
+   Additionally it sorts and merges for the new generated left-hand
+   bitwise-or tree-chain the bitwise-not expressions.
+
+   CODE specifies the final compare expression code. It can be either
+   EQ_EXPR, or NE_EXPR.
+
+   If PGSI is not NULL, then final compare instruction
+   gets assigned to the statement it points to.  Otherwise an new
+   temporary is created for the comparison and stored in VEXPR vector.
+
+   Special note:  We try to merge first elements of compatible
+   types, before doing final merge by casting up to widest type.
+   For the bitwise-and case, we need to make sure sign-expansion is
+   done always for type-expansion.  */
+
+static void
+merge_bitwise_compares (VEC(tree, heap) **vcmp, VEC(tree, heap) **vexpr,
+			enum tree_code code, gimple_stmt_iterator *pgsi)
+{
+  unsigned int i;
+  unsigned int len = VEC_length (tree, *vcmp);
+  tree l, r, x, cmp_type = NULL_TREE;
+  VEC(tree, heap) *vtmp = NULL;
+  VEC(tree, heap) *vtmp_not = NULL;
+  enum tree_code cmp_code;
+
+  /* If we have just one compare-element, then simply
+     move it to VEXPR vector and return.  */
+  if (len <= 1)
+    {
+      if (VEC_iterate (tree, (*vcmp), 0, (x)))
+	VEC_safe_push (tree, heap, *vexpr, x);
+
+      VEC_free (tree, heap, *vcmp);
+      *vcmp = NULL;
+      return;
+    }
+
+  /* First merge all elements with compatible type, and move
+     intermediate result into VTMP vector.  */
+  while (VEC_length (tree, *vcmp) > 0)
+    {
+      cmp_type = NULL_TREE;
+      l = NULL_TREE;
+      for (i = 0; VEC_iterate (tree, (*vcmp), (i), (x));)
+	{
+	  gimple s = SSA_NAME_DEF_STMT (x);
+	  r = gimple_assign_rhs1 (s);
+	  if (TREE_CODE (r) != SSA_NAME
+	      || (s = SSA_NAME_DEF_STMT (r)) == NULL
+	      || !is_gimple_assign (s)
+	      || gimple_assign_rhs_code (s) != BIT_NOT_EXPR)
+	    {
+	      i++;
+	      continue;
+	    }
+	  r = gimple_assign_rhs1 (s);
+	  if (!l)
+	    {
+	      l = r;
+	      cmp_type = TREE_TYPE (x);
+	      VEC_ordered_remove (tree, *vcmp, i);
+	    }
+	  else if (types_compatible_p (TREE_TYPE (r), TREE_TYPE (l)))
+	    {
+	      l = make_new_tmp_statement (TREE_TYPE (l),
+					  BIT_AND_EXPR, l, r, s);
+	      VEC_ordered_remove (tree, *vcmp, i);
+	    }
+	  else
+	    ++i;
+	}
+      if (l != NULL_TREE)
+	VEC_safe_push (tree, heap, vtmp_not, l);
+      l = NULL_TREE;
+      for (i = 0; VEC_iterate (tree, (*vcmp), (i), (x));)
+	{
+	  gimple s2;
+	  gimple s = SSA_NAME_DEF_STMT (x);
+	  r = gimple_assign_rhs1 (s);
+	  if (TREE_CODE (r) == SSA_NAME
+	      && (s2 = SSA_NAME_DEF_STMT (r)) != NULL
+	      && is_gimple_assign (s2)
+	      && gimple_assign_rhs_code (s2) == BIT_NOT_EXPR)
+	    {
+	      i++;
+	      continue;
+	    }
+	  if (!l)
+	    {
+	      l = r;
+	      cmp_type = TREE_TYPE (x);
+	      VEC_ordered_remove (tree, *vcmp, i);
+	    }
+	  else if (types_compatible_p (TREE_TYPE (r), TREE_TYPE (l)))
+	    {
+	      l = make_new_tmp_statement (TREE_TYPE (l),
+					  BIT_IOR_EXPR, l, r, s);
+	      VEC_ordered_remove (tree, *vcmp, i);
+	    }
+	  else
+	    ++i;
+	}
+      if (l != NULL_TREE)
+        VEC_safe_push (tree, heap, vtmp, l);
+    }
+  VEC_free (tree, heap, *vcmp);
+  *vcmp = NULL;
+
+  /* Combine elements in vector vtmp_not and put them to vtmp.  */
+  l = NULL_TREE;
+  r = NULL_TREE;
+
+  FOR_EACH_VEC_ELT (tree, vtmp_not, i, x)
+    {
+      tree type;
+      type = TREE_TYPE (x);
+
+      if (TYPE_UNSIGNED (type) && VEC_length (tree, vtmp_not) > 1)
+        {
+	  type =  signed_type_for (type);
+	  x = make_new_tmp_statement (type, NOP_EXPR, x, NULL_TREE,
+				      SSA_NAME_DEF_STMT (x));
+	}
+      if (!l)
+        {
+	  l = x;
+	  r = type;
+	}
+      else
+        {
+	  if (TYPE_PRECISION (r) < TYPE_PRECISION (type))
+	    {
+	      l = make_new_tmp_statement (type, NOP_EXPR, l, NULL_TREE,
+					  SSA_NAME_DEF_STMT (x));
+	      r = type;
+	    }
+	  else if (TYPE_PRECISION (r) > TYPE_PRECISION (type)
+	           || !types_compatible_p (r, type))
+	    x = make_new_tmp_statement (r, NOP_EXPR, x, NULL_TREE,
SSA_NAME_DEF_STMT (x));
+	  l = make_new_tmp_statement (r, BIT_AND_EXPR, l, x, SSA_NAME_DEF_STMT (x));
+	}
+    }
+  if (l != NULL_TREE)
+    {
+      l = make_new_tmp_statement (TREE_TYPE (l), BIT_NOT_EXPR, l, NULL_TREE,
+      				  SSA_NAME_DEF_STMT (l));
+      VEC_safe_push (tree, heap, vtmp, l);
+    }
+  VEC_free (tree, heap, vtmp_not);
+  vtmp_not = NULL;
+
+  /* Now do final merge of all elements in VTMP vector by casting to
+     wider-type.  */
+  l = NULL_TREE;
+  r = NULL_TREE;
+  FOR_EACH_VEC_ELT (tree, vtmp, i, x)
+    {
+      tree type;
+      type = TREE_TYPE (x);
+
+      if (!l)
+        {
+	  l = x;
+	  r = type;
+	}
+      else
+        {
+	  if (TYPE_PRECISION (r) < TYPE_PRECISION (type))
+	    {
+	      l = make_new_tmp_statement (type, NOP_EXPR, l, NULL_TREE,
SSA_NAME_DEF_STMT (x));
+	      r = type;
+	    }
+	  else if (TYPE_PRECISION (r) > TYPE_PRECISION (type)
+	           || !types_compatible_p (r, type))
+	    x = make_new_tmp_statement (r, NOP_EXPR, x, NULL_TREE,
SSA_NAME_DEF_STMT (x));
+	  l = make_new_tmp_statement (r, BIT_IOR_EXPR, l, x, SSA_NAME_DEF_STMT (x));
+	}
+    }
+
+  /* (X == 0) & (Y == 0) -> (X | Y) == 0
+     (X != 0) | (Y != 0) -> (X | Y) != 0
+   */
+  cmp_code = (code == BIT_AND_EXPR ? EQ_EXPR : NE_EXPR);
+
+  if (!pgsi)
+    {
+      l = make_new_tmp_statement (cmp_type,
+				  cmp_code, l,
+				  build_zero_cst (TREE_TYPE (l)),
+				  SSA_NAME_DEF_STMT (l));
+      VEC_safe_push (tree, heap, *vexpr, l);
+    }
+  else
+    gimple_assign_set_rhs_with_ops (pgsi, cmp_code, l,
+				    build_zero_cst (TREE_TYPE (l)));
+  VEC_free (tree, heap, vtmp);
+}
+
 /* Sink bitwise-not expression out of bitwise-binary sequence for STMT.  The
    tree-code of the statement-sequence CODE.
    Following patterns are used for transformations:
@@ -3296,8 +3521,11 @@  operate_bitwise_xor_stmt (gimple_stmt_it
      ~X or ~Y -> ~(X and Y)
      ~X xor ~Y -> X xor Y
      ~X xor Y -> ~(X xor Y)
-     ~X xor CST -> X xor CST' (with CST' = ~CST). */
-
+     ~X xor CST -> X xor CST' (with CST' = ~CST).
+     (X == 0) and (Y == 0) -> (X | Y) == 0, if X and Y have
+     integral types.
+     (X != 0) or (Y != 0) -> (X | Y) != 0, if X and Y have
+     integral types. */
 static bool
 operate_bitwise_stmt (gimple_stmt_iterator *pgsi, gimple stmt, enum
tree_code code)
 {
@@ -3306,17 +3534,22 @@  operate_bitwise_stmt (gimple_stmt_iterat
   tree tnot = NULL_TREE, torg = NULL_TREE;
   VEC(tree, heap) *vcst = NULL;
   VEC(tree, heap) *vnot = NULL;
+  VEC(tree, heap) *vcmp_zero = NULL;
   VEC(tree, heap) *vexpr = NULL;
   enum tree_code icode;
   bool do_repropagate = false;

   gcc_assert (gimple_assign_rhs_code (stmt) == code);

-  walk_bitwise_stmt_elems (stmt, code, &vcst, &vnot, &vexpr);
+  walk_bitwise_stmt_elems (stmt, code, &vcst, &vnot,
+  			   &vcmp_zero, &vexpr);

   /* There are more then one constant values,  */
   if (VEC_length (tree, vcst) > 1)
     do_repropagate = true;
+  /* There are more then one comparison-to-zero,  */
+  else if (VEC_length (tree, vcmp_zero) > 1)
+    do_repropagate = true;
   /* There are more then one not-expressions,  */
   else if (VEC_length (tree, vnot) > 1)
     do_repropagate = true;
@@ -3324,6 +3557,7 @@  operate_bitwise_stmt (gimple_stmt_iterat
      If we have (~X | CST), we convert to ~(X & CST') with CST'=~CST.  */
   else if (VEC_length (tree, vnot) == 1
            && VEC_length (tree, vexpr) == 0
+           && VEC_length (tree, vcmp_zero) == 0
            && VEC_length (tree, vcst) >= 1)
      do_repropagate = true;
   /* For xor-expressions we do following conversion:
@@ -3337,6 +3571,7 @@  operate_bitwise_stmt (gimple_stmt_iterat
     {
       VEC_free (tree, heap, vcst);
       VEC_free (tree, heap, vnot);
+      VEC_free (tree, heap, vcmp_zero);
       VEC_free (tree, heap, vexpr);
       return false;
     }
@@ -3352,6 +3587,16 @@  operate_bitwise_stmt (gimple_stmt_iterat
   VEC_free (tree, heap, vcst);
   vcst = NULL;

+  /* Do we have only vcmp_zero elements, then directly assign result
+     of combine to final stmt.  */
+  if (!tcst && !vexpr && !vnot)
+    {
+      merge_bitwise_compares (&vcmp_zero, &vexpr, code, pgsi);
+      return true;
+    }
+
+  merge_bitwise_compares (&vcmp_zero, &vexpr, code, NULL);
+
   /* If we deal only with constants, assign
      calculated constant.  */
   if (tcst && !vnot && !vexpr)
@@ -3421,7 +3666,10 @@  operate_bitwise_stmt (gimple_stmt_iterat
 }

 /* Repropagate bitwise-operations back to de-normalized form.
-   ~X op ~Y -> ~(X op' Y). */
+   ~X op ~Y -> ~(X op' Y)
+   (X != 0) | (Y != 0) -> (X | Y) != 0,
+   (X == 0) & (Y == 0) -> (X | Y) == 0, and
+   ~X ==/!= 0 -> X ==/!= ~0.  */

 static void
 repropagate_bitwise (basic_block bb)
Index: gcc/gcc/testsuite/gcc.dg/tree-ssa/reassoc-repro_cmpior-1.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.dg/tree-ssa/reassoc-repro_cmpior-1.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -ffast-math" } */
+
+int foo (int a, int b, int c, int d)
+{
+  int r = (a == 0) & (b == 0);
+  int q = (c == 0) & (d == 0);
+  return r & q;
+}
+
+int bar (int a, int b, int c, int d)
+{
+  int r = (a != 0) | (b != 0);
+  int q = (c != 0) | (d != 0);
+  return r | q;
+}
+
+/* { dg-final { scan-tree-dump-times " == 0" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times " != 0" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
+
Index: gcc/gcc/testsuite/gcc.dg/tree-ssa/reassoc-repro_cmpior-2.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.dg/tree-ssa/reassoc-repro_cmpior-2.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -ffast-math" } */
+
+int foo (int a, char b, short c, long d)
+{
+  int r = (a == 0) & (b == 0);
+  int q = (c == 0) & (d == 0);
+  return r & q;
+}
+
+int bar (int a, char b, short c, long d)
+{
+  int r = (a != 0) | (b != 0);
+  int q = (c != 0) | (d != 0);
+  return r | q;
+}
+
+/* { dg-final { scan-tree-dump-times " == 0" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times " != 0" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
+
Index: gcc/gcc/testsuite/gcc.dg/tree-ssa/reassoc-repro_cmpior-3.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.dg/tree-ssa/reassoc-repro_cmpior-3.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -ffast-math" } */
+
+int foo (int a, unsigned char b, unsigned short c, unsigned long d)
+{
+  unsigned char b1 = 0;
+  unsigned short c1 = 0;
+  unsigned long d1 = 0;
+  int r = (a == 0) & (b == b1);
+  int q = (c == c1) & (d == d1);
+  return r & q;
+}
+
+int bar (int a, unsigned char b, unsigned short c, unsigned long d)
+{
+  unsigned char b1 = 0;
+  unsigned short c1 = 0;
+  unsigned long d1 = 0;
+  int r = (a != 0) | (b != b1);
+  int q = (c != c1) | (d != d1);
+  return r | q;
+}
+
+/* { dg-final { scan-tree-dump-times " == 0" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times " != 0" 1 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
+