diff mbox

Compute nonzero_bits from __builtin_unreachable assertions

Message ID 20131025221933.GN30970@tucnak.zalov.cz
State New
Headers show

Commit Message

Jakub Jelinek Oct. 25, 2013, 10:19 p.m. UTC
Hi!

And here is a patch that allows vectorization without peeling for alignment
and scalar loop for bound even for fn2, fn3 and fn4 in the following
testcase, though as with the range __builtin_unreachable () notes, it is
quite fragile, because it only works if there are no immediate uses of the
tested SSA_NAME before the assertion.  Perhaps more reliable way would be to
convert those assertions info __builtin_assume_aligned, but that has the
disadvantage that it's first argument is a pointer and it returns a pointer,
so we'd need to cast integers to pointers and back, or add ASSUME_ALIGNED
internal function.

Bootstrapped/regtested on x86_64-linux and i686-linux.

int a[1024];

void
fn1 (int x, int y)
{
  int i;
  x &= -32;
  y &= -32;
  for (i = x + 32; i < y; i++)
    a[i]++;
}

void
fn2 (int x, int y)
{
  int i;
  if (x & 31)
    __builtin_unreachable ();
  if (y & 31)
    __builtin_unreachable ();
  for (i = x + 32; i < x + y; i++)
    a[i]++;
}

void
fn3 (int x, int y)
{
  int i;
  if (x % 32)
    __builtin_unreachable ();
  if (y % 32)
    __builtin_unreachable ();
  for (i = x + 32; i < x + y; i++)
    a[i]++;
}

void
fn3 (int x, int y)
{
  int i;
  if ((x % 32) != 0)
    __builtin_unreachable ();
  if ((y % 32) != 0)
    __builtin_unreachable ();
  for (i = x + 32; i < x + y; i++)
    a[i]++;
}

2013-10-25  Jakub Jelinek  <jakub@redhat.com>

	* tree-vrp.c (maybe_set_nonzero_bits): New function.
	(remove_range_assertions): Call it.


	Jakub

Comments

Richard Biener Oct. 29, 2013, 2:54 p.m. UTC | #1
On Sat, 26 Oct 2013, Jakub Jelinek wrote:

> Hi!
> 
> And here is a patch that allows vectorization without peeling for alignment
> and scalar loop for bound even for fn2, fn3 and fn4 in the following
> testcase, though as with the range __builtin_unreachable () notes, it is
> quite fragile, because it only works if there are no immediate uses of the
> tested SSA_NAME before the assertion.  Perhaps more reliable way would be to
> convert those assertions info __builtin_assume_aligned, but that has the
> disadvantage that it's first argument is a pointer and it returns a pointer,
> so we'd need to cast integers to pointers and back, or add ASSUME_ALIGNED
> internal function.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux.

Ok.

Thanks,
Richard.

> int a[1024];
> 
> void
> fn1 (int x, int y)
> {
>   int i;
>   x &= -32;
>   y &= -32;
>   for (i = x + 32; i < y; i++)
>     a[i]++;
> }
> 
> void
> fn2 (int x, int y)
> {
>   int i;
>   if (x & 31)
>     __builtin_unreachable ();
>   if (y & 31)
>     __builtin_unreachable ();
>   for (i = x + 32; i < x + y; i++)
>     a[i]++;
> }
> 
> void
> fn3 (int x, int y)
> {
>   int i;
>   if (x % 32)
>     __builtin_unreachable ();
>   if (y % 32)
>     __builtin_unreachable ();
>   for (i = x + 32; i < x + y; i++)
>     a[i]++;
> }
> 
> void
> fn3 (int x, int y)
> {
>   int i;
>   if ((x % 32) != 0)
>     __builtin_unreachable ();
>   if ((y % 32) != 0)
>     __builtin_unreachable ();
>   for (i = x + 32; i < x + y; i++)
>     a[i]++;
> }
> 
> 2013-10-25  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* tree-vrp.c (maybe_set_nonzero_bits): New function.
> 	(remove_range_assertions): Call it.
> 
> --- gcc/tree-vrp.c.jj	2013-10-24 14:32:29.000000000 +0200
> +++ gcc/tree-vrp.c	2013-10-25 21:21:35.183092937 +0200
> @@ -6459,6 +6459,60 @@ check_all_array_refs (void)
>      }
>  }
>  
> +/* Handle
> +   _4 = x_3 & 31;
> +   if (_4 != 0)
> +     goto <bb 6>;
> +   else
> +     goto <bb 7>;
> +   <bb 6>:
> +   __builtin_unreachable ();
> +   <bb 7>:
> +   x_5 = ASSERT_EXPR <x_3, ...>;
> +   If x_3 has no other immediate uses (checked by caller),
> +   var is the x_3 var from ASSERT_EXPR, we can clear low 5 bits
> +   from the non-zero bitmask.  */
> +
> +static void
> +maybe_set_nonzero_bits (basic_block bb, tree var)
> +{
> +  edge e = single_pred_edge (bb);
> +  basic_block cond_bb = e->src;
> +  gimple stmt = last_stmt (cond_bb);
> +  tree cst;
> +
> +  if (stmt == NULL
> +      || gimple_code (stmt) != GIMPLE_COND
> +      || gimple_cond_code (stmt) != ((e->flags & EDGE_TRUE_VALUE)
> +				     ? EQ_EXPR : NE_EXPR)
> +      || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME
> +      || !integer_zerop (gimple_cond_rhs (stmt)))
> +    return;
> +
> +  stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
> +  if (!is_gimple_assign (stmt)
> +      || gimple_assign_rhs_code (stmt) != BIT_AND_EXPR
> +      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST)
> +    return;
> +  if (gimple_assign_rhs1 (stmt) != var)
> +    {
> +      gimple stmt2;
> +
> +      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
> +	return;
> +      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
> +      if (!gimple_assign_cast_p (stmt2)
> +	  || gimple_assign_rhs1 (stmt2) != var
> +	  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2))
> +	  || (TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (stmt)))
> +			      != TYPE_PRECISION (TREE_TYPE (var))))
> +	return;
> +    }
> +  cst = gimple_assign_rhs2 (stmt);
> +  set_nonzero_bits (var, (get_nonzero_bits (var)
> +			  & ~tree_to_double_int (cst)));
> +}
> +
>  /* Convert range assertion expressions into the implied copies and
>     copy propagate away the copies.  Doing the trivial copy propagation
>     here avoids the need to run the full copy propagation pass after
> @@ -6576,8 +6630,11 @@ remove_range_assertions (void)
>  			    }
>  			}
>  		    if (ok)
> -		      set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
> -				      SSA_NAME_RANGE_INFO (lhs)->max);
> +		      {
> +			set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
> +					SSA_NAME_RANGE_INFO (lhs)->max);
> +			maybe_set_nonzero_bits (bb, var);
> +		      }
>  		  }
>  	      }
>  
> 
> 	Jakub
> 
>
diff mbox

Patch

--- gcc/tree-vrp.c.jj	2013-10-24 14:32:29.000000000 +0200
+++ gcc/tree-vrp.c	2013-10-25 21:21:35.183092937 +0200
@@ -6459,6 +6459,60 @@  check_all_array_refs (void)
     }
 }
 
+/* Handle
+   _4 = x_3 & 31;
+   if (_4 != 0)
+     goto <bb 6>;
+   else
+     goto <bb 7>;
+   <bb 6>:
+   __builtin_unreachable ();
+   <bb 7>:
+   x_5 = ASSERT_EXPR <x_3, ...>;
+   If x_3 has no other immediate uses (checked by caller),
+   var is the x_3 var from ASSERT_EXPR, we can clear low 5 bits
+   from the non-zero bitmask.  */
+
+static void
+maybe_set_nonzero_bits (basic_block bb, tree var)
+{
+  edge e = single_pred_edge (bb);
+  basic_block cond_bb = e->src;
+  gimple stmt = last_stmt (cond_bb);
+  tree cst;
+
+  if (stmt == NULL
+      || gimple_code (stmt) != GIMPLE_COND
+      || gimple_cond_code (stmt) != ((e->flags & EDGE_TRUE_VALUE)
+				     ? EQ_EXPR : NE_EXPR)
+      || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME
+      || !integer_zerop (gimple_cond_rhs (stmt)))
+    return;
+
+  stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
+  if (!is_gimple_assign (stmt)
+      || gimple_assign_rhs_code (stmt) != BIT_AND_EXPR
+      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST)
+    return;
+  if (gimple_assign_rhs1 (stmt) != var)
+    {
+      gimple stmt2;
+
+      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
+	return;
+      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
+      if (!gimple_assign_cast_p (stmt2)
+	  || gimple_assign_rhs1 (stmt2) != var
+	  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2))
+	  || (TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (stmt)))
+			      != TYPE_PRECISION (TREE_TYPE (var))))
+	return;
+    }
+  cst = gimple_assign_rhs2 (stmt);
+  set_nonzero_bits (var, (get_nonzero_bits (var)
+			  & ~tree_to_double_int (cst)));
+}
+
 /* Convert range assertion expressions into the implied copies and
    copy propagate away the copies.  Doing the trivial copy propagation
    here avoids the need to run the full copy propagation pass after
@@ -6576,8 +6630,11 @@  remove_range_assertions (void)
 			    }
 			}
 		    if (ok)
-		      set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
-				      SSA_NAME_RANGE_INFO (lhs)->max);
+		      {
+			set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
+					SSA_NAME_RANGE_INFO (lhs)->max);
+			maybe_set_nonzero_bits (bb, var);
+		      }
 		  }
 	      }