Patchwork Teach VRP about __builtin_{ffs,parity,popcount,clz,ctz,clrsb}{,l,ll,imax} (PR target/29776)

login
register
mail settings
Submitter Jakub Jelinek
Date July 5, 2013, 3:01 p.m.
Message ID <20130705150126.GW2336@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/257185/
State New
Headers show

Comments

Jakub Jelinek - July 5, 2013, 3:01 p.m.
Hi!

Attached are two versions of a patch to teach VRP about the int bitop
builtins.  Both patches are identical for all builtins but
__builtin_c[lt]z*, which are the only two from these that are documented
to have undefined behavior on some argument (0).

The first version is strict, it assumes __builtin_c[lt]z* (0) doesn't happen
in valid programs, while the second one attempts to be less strict to avoid
breaking stuff too much.

The reason for writing the second patch is that longlong.h on various
targets has stuff like:
#ifdef __alpha_cix__
#define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
#define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
#define COUNT_LEADING_ZEROS_0 64
#else
and documents that if COUNT_LEADING_ZEROS_0 is defined, then
count_leading_zeros (cnt, 0) should be well defined and set
cnt to COUNT_LEADING_ZEROS_0.  While neither gcc nor glibc use
COUNT_LEADING_ZEROS_0 macro, I'm a little bit afraid some code in the wild
might do, and it might even have its own copy of longlong.h, so even if
we've removed those COUNT_LEADING_ZEROS_0 macros for targets that
use the builtins, something could stay broken.  So, what the patch does
is if an optab exists for the mode of the builtin's argument and
C?Z_DEFINED_VALUE_AT_ZERO is defined, then it will add that value to the
range unless VR of argument is non-zero (well, it handles only a few
interesting commonly used values, for CLZ only precision of the mode
(seems right now when CLZ_DEFINED_VALUE_AT_ZERO is non-zero, it sets
it always to bitsize of the mode, and even widening or double word
narrowing expansion should maintain this property), for CTZ -1 and
bitsize).  If there isn't an optab for it, for CLZ it still assumes
it might be bitsize, for CTZ it just assumes it is undefined behavior
otherwise, because if I understand the code right, for CTZ we really could
return various values for 0 without hw support for the mode, e.g. when
CTZ is implemented using CLZ, it might return something, if we use wider
mode hw CTZ and it would return bitsize, that would be bitsize of the wider
mode etc.  I bet longlong.h only uses __builtin_c?z builtins for modes
actually implemented in hw anyway (otherwise it couldn't be used safely in
libgcc implementation of those libcalls).

Both patches have been bootstrapped/regtested on x86_64-linux and
i686-linux, which one do you prefer?

	Jakub
2013-07-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/29776
	* fold-const.c (tree_call_nonnegative_warnv_p): Return true
	for BUILT_IN_C{LZ,TZ,LRSB}*.
	* tree.h (CASE_INT_FN): Add FN##IMAX case.
	* tree-vrp.c (extract_range_basic): Handle
	BUILT_IN_{FFS,PARITY,POPCOUNT,C{LZ,TZ,LRSB}}*.  For
	BUILT_IN_CONSTANT_P if argument isn't (D) of PARM_DECL,
	fall thru to code calling set_value*.
	* builtins.c (expand_builtin): Remove *IMAX cases.
	(fold_builtin_bitop): For BUILT_IN_CLRSB* return NULL_TREE
	if width is bigger than 2*HWI.

	* libgcc2.c (__floattisf): Avoid undefined signed overflow.

	* gcc.dg/tree-ssa/vrp89.c: New test.
2013-07-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/29776
	* fold-const.c (tree_call_nonnegative_warnv_p): Return true
	for BUILT_IN_C{LZ,LRSB}*.
	* tree.h (CASE_INT_FN): Add FN##IMAX case.
	* tree-vrp.c (extract_range_basic): Handle
	BUILT_IN_{FFS,PARITY,POPCOUNT,C{LZ,TZ,LRSB}}*.  For
	BUILT_IN_CONSTANT_P if argument isn't (D) of PARM_DECL,
	fall thru to code calling set_value*.
	* builtins.c (expand_builtin): Remove *IMAX cases.
	(fold_builtin_bitop): For BUILT_IN_CLRSB* return NULL_TREE
	if width is bigger than 2*HWI.

	* libgcc2.c (__floattisf): Avoid undefined signed overflow.

	* gcc.dg/tree-ssa/vrp89.c: New test.

--- gcc/fold-const.c.jj	2013-06-19 19:28:32.000000000 +0200
+++ gcc/fold-const.c	2013-07-04 12:13:28.153901855 +0200
@@ -15606,6 +15606,8 @@ tree_call_nonnegative_warnv_p (tree type
 	CASE_INT_FN (BUILT_IN_FFS):
 	CASE_INT_FN (BUILT_IN_PARITY):
 	CASE_INT_FN (BUILT_IN_POPCOUNT):
+	CASE_INT_FN (BUILT_IN_CLZ):
+	CASE_INT_FN (BUILT_IN_CLRSB):
       case BUILT_IN_BSWAP32:
       case BUILT_IN_BSWAP64:
 	/* Always true.  */
--- gcc/tree.h.jj	2013-06-23 20:43:31.000000000 +0200
+++ gcc/tree.h	2013-07-04 11:42:42.403530159 +0200
@@ -322,7 +322,7 @@ extern const char * built_in_names[(int)
 
 #define CASE_FLT_FN(FN) case FN: case FN##F: case FN##L
 #define CASE_FLT_FN_REENT(FN) case FN##_R: case FN##F_R: case FN##L_R
-#define CASE_INT_FN(FN) case FN: case FN##L: case FN##LL
+#define CASE_INT_FN(FN) case FN: case FN##L: case FN##LL: case FN##IMAX
 
 /* In an OMP_CLAUSE node.  */
 
--- gcc/tree-vrp.c.jj	2013-05-24 20:07:40.000000000 +0200
+++ gcc/tree-vrp.c	2013-07-05 11:17:15.354100260 +0200
@@ -3565,20 +3565,184 @@ extract_range_basic (value_range_t *vr,
   bool sop = false;
   tree type = gimple_expr_type (stmt);
 
-  /* If the call is __builtin_constant_p and the argument is a
-     function parameter resolve it to false.  This avoids bogus
-     array bound warnings.
-     ???  We could do this as early as inlining is finished.  */
-  if (gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
+  if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
     {
-      tree arg = gimple_call_arg (stmt, 0);
-      if (TREE_CODE (arg) == SSA_NAME
-	  && SSA_NAME_IS_DEFAULT_DEF (arg)
-	  && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL)
-	set_value_range_to_null (vr, type);
+      tree fndecl = gimple_call_fndecl (stmt), arg;
+      int mini, maxi, zerov = 0, prec;
+
+      switch (DECL_FUNCTION_CODE (fndecl))
+	{
+	case BUILT_IN_CONSTANT_P:
+	  /* If the call is __builtin_constant_p and the argument is a
+	     function parameter resolve it to false.  This avoids bogus
+	     array bound warnings.
+	     ???  We could do this as early as inlining is finished.  */
+	  arg = gimple_call_arg (stmt, 0);
+	  if (TREE_CODE (arg) == SSA_NAME
+	      && SSA_NAME_IS_DEFAULT_DEF (arg)
+	      && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL)
+	    {
+	      set_value_range_to_null (vr, type);
+	      return;
+	    }
+	  break;
+	  /* Both __builtin_ffs* and __builtin_popcount return
+	     [0, prec].  */
+	CASE_INT_FN (BUILT_IN_FFS):
+	CASE_INT_FN (BUILT_IN_POPCOUNT):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec;
+	  if (TREE_CODE (arg) == SSA_NAME)
+	    {
+	      value_range_t *vr0 = get_value_range (arg);
+	      /* If arg is non-zero, then ffs or popcount
+		 are non-zero.  */
+	      if (((vr0->type == VR_RANGE
+		    && integer_nonzerop (vr0->min))
+		   || (vr0->type == VR_ANTI_RANGE
+		       && integer_zerop (vr0->min)))
+		  && !TREE_OVERFLOW (vr0->min))
+		mini = 1;
+	      /* If some high bits are known to be zero,
+		 we can decrease the maximum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->max) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->max))
+		maxi = tree_floor_log2 (vr0->max) + 1;
+	    }
+	  goto bitop_builtin;
+	  /* __builtin_parity* returns [0, 1].  */
+	CASE_INT_FN (BUILT_IN_PARITY):
+	  mini = 0;
+	  maxi = 1;
+	  goto bitop_builtin;
+	  /* __builtin_c[lt]z* return [0, prec-1], except for
+	     when the argument is 0, but that is undefined behavior.
+	     On many targets where the CLZ RTL or optab value is defined
+	     for 0 the value is prec, so include that in the range
+	     by default.  */
+	CASE_INT_FN (BUILT_IN_CLZ):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec;
+	  if (optab_handler (ctz_optab, TYPE_MODE (TREE_TYPE (arg)))
+	      != CODE_FOR_nothing
+	      && CLZ_DEFINED_VALUE_AT_ZERO (TYPE_MODE (TREE_TYPE (arg)),
+					    zerov)
+	      /* Handle only the single common value.  */
+	      && zerov != prec)
+	    /* Magic value to give up, unless vr0 proves
+	       arg is non-zero.  */
+	    mini = -2;
+	  if (TREE_CODE (arg) == SSA_NAME)
+	    {
+	      value_range_t *vr0 = get_value_range (arg);
+	      /* From clz of VR_RANGE minimum we can compute
+		 result maximum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->min) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->min))
+		{
+		  maxi = prec - 1 - tree_floor_log2 (vr0->min);
+		  if (maxi != prec)
+		    mini = 0;
+		}
+	      else if (vr0->type == VR_ANTI_RANGE
+		       && integer_zerop (vr0->min)
+		       && !TREE_OVERFLOW (vr0->min))
+		{
+		  maxi = prec - 1;
+		  mini = 0;
+		}
+	      if (mini == -2)
+		break;
+	      /* From clz of VR_RANGE maximum we can compute
+		 result minimum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->max) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->max))
+		{
+		  mini = prec - 1 - tree_floor_log2 (vr0->max);
+		  if (mini == prec)
+		    break;
+		}
+	    }
+	  if (mini == -2)
+	    break;
+	  goto bitop_builtin;
+	  /* __builtin_ctz* return [0, prec-1], except for
+	     when the argument is 0, but that is undefined behavior.
+	     If there is a ctz optab for this mode and
+	     CTZ_DEFINED_VALUE_AT_ZERO, include that in the range,
+	     otherwise just assume 0 won't be seen.  */
+	CASE_INT_FN (BUILT_IN_CTZ):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec - 1;
+	  if (optab_handler (ctz_optab, TYPE_MODE (TREE_TYPE (arg)))
+	      != CODE_FOR_nothing
+	      && CTZ_DEFINED_VALUE_AT_ZERO (TYPE_MODE (TREE_TYPE (arg)),
+					    zerov))
+	    {
+	      /* Handle only the two common values.  */
+	      if (zerov == -1)
+		mini = -1;
+	      else if (zerov == prec)
+		maxi = prec;
+	      else
+		/* Magic value to give up, unless vr0 proves
+		   arg is non-zero.  */
+		mini = -2;
+	    }
+	  if (TREE_CODE (arg) == SSA_NAME)
+	    {
+	      value_range_t *vr0 = get_value_range (arg);
+	      /* If arg is non-zero, then use [0, prec - 1].  */
+	      if (((vr0->type == VR_RANGE
+		    && integer_nonzerop (vr0->min))
+		   || (vr0->type == VR_ANTI_RANGE
+		       && integer_zerop (vr0->min)))
+		  && !TREE_OVERFLOW (vr0->min))
+		{
+		  mini = 0;
+		  maxi = prec - 1;
+		}
+	      /* If some high bits are known to be zero,
+		 we can decrease the result maximum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->max) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->max))
+		{
+		  maxi = tree_floor_log2 (vr0->max);
+		  /* For vr0 [0, 0] give up.  */
+		  if (maxi == -1)
+		    break;
+		}
+	    }
+	  if (mini == -2)
+	    break;
+	  goto bitop_builtin;
+	  /* __builtin_clrsb* returns [0, prec-1].  */
+	CASE_INT_FN (BUILT_IN_CLRSB):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec - 1;
+	  goto bitop_builtin;
+	bitop_builtin:
+	  set_value_range (vr, VR_RANGE, build_int_cst (type, mini),
+			   build_int_cst (type, maxi), NULL);
+	  return;
+	default:
+	  break;
+	}
     }
-  else if (INTEGRAL_TYPE_P (type)
-	   && gimple_stmt_nonnegative_warnv_p (stmt, &sop))
+  if (INTEGRAL_TYPE_P (type)
+      && gimple_stmt_nonnegative_warnv_p (stmt, &sop))
     set_value_range_to_nonnegative (vr, type,
 				    sop || stmt_overflow_infinity (stmt));
   else if (vrp_stmt_computes_nonzero (stmt, &sop)
--- gcc/builtins.c.jj	2013-07-03 22:15:05.000000000 +0200
+++ gcc/builtins.c	2013-07-04 11:43:31.801693566 +0200
@@ -6107,7 +6107,6 @@ expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_FFS):
-    case BUILT_IN_FFSIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, ffs_optab);
       if (target)
@@ -6115,7 +6114,6 @@ expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_CLZ):
-    case BUILT_IN_CLZIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, clz_optab);
       if (target)
@@ -6123,7 +6121,6 @@ expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_CTZ):
-    case BUILT_IN_CTZIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, ctz_optab);
       if (target)
@@ -6131,7 +6128,6 @@ expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_CLRSB):
-    case BUILT_IN_CLRSBIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, clrsb_optab);
       if (target)
@@ -6139,7 +6135,6 @@ expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_POPCOUNT):
-    case BUILT_IN_POPCOUNTIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, popcount_optab);
       if (target)
@@ -6147,7 +6142,6 @@ expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_PARITY):
-    case BUILT_IN_PARITYIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, parity_optab);
       if (target)
@@ -8152,6 +8146,8 @@ fold_builtin_bitop (tree fndecl, tree ar
 	  break;
 
 	CASE_INT_FN (BUILT_IN_CLRSB):
+	  if (width > 2 * HOST_BITS_PER_WIDE_INT)
+	    return NULL_TREE;
 	  if (width > HOST_BITS_PER_WIDE_INT
 	      && (hi & ((unsigned HOST_WIDE_INT) 1
 			<< (width - HOST_BITS_PER_WIDE_INT - 1))) != 0)
--- libgcc/libgcc2.c.jj	2013-07-03 13:01:15.000000000 +0200
+++ libgcc/libgcc2.c	2013-07-05 08:54:42.080525408 +0200
@@ -1571,7 +1571,7 @@ FUNC (DWtype u)
   /* Otherwise, find the power of two.  */
   Wtype hi = u >> W_TYPE_SIZE;
   if (hi < 0)
-    hi = -hi;
+    hi = -(UWtype) hi;
 
   UWtype count, shift;
   count_leading_zeros (count, hi);
--- gcc/testsuite/gcc.dg/tree-ssa/vrp89.c.jj	2013-07-04 12:19:17.271993105 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/vrp89.c	2013-07-05 11:19:34.076815347 +0200
@@ -0,0 +1,57 @@
+/* PR target/29776 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1" } */
+/* { dg-final { scan-tree-dump-not "link_error" "vrp1"} } */
+/* { dg-final { cleanup-tree-dump "vrp1" } } */
+
+#define A(fn, arg, min, max) \
+  if (__builtin_##fn (arg) < min || __builtin_##fn (arg) > max) \
+    link_error ();
+#define B(fn, min, max) \
+  A (fn, a, min, max) A (fn##l, b, min, max) A (fn##ll, c, min, max)
+#define C(fn, min, sub) \
+  A (fn, a, min, ((int) sizeof (a) * __CHAR_BIT__ - sub)) \
+  A (fn##l, b, min, ((int) sizeof (b) * __CHAR_BIT__ - sub)) \
+  A (fn##ll, c, min, ((int) sizeof (c) * __CHAR_BIT__ - sub))
+#define D(fn, sub1, sub2) \
+  A (fn, a, ((int) sizeof (a) * __CHAR_BIT__ - sub1), \
+     ((int) sizeof (a) * __CHAR_BIT__ - sub2)) \
+  A (fn##l, b, ((int) sizeof (b) * __CHAR_BIT__ - sub1), \
+     ((int) sizeof (b) * __CHAR_BIT__ - sub2)) \
+  A (fn##ll, c, ((int) sizeof (c) * __CHAR_BIT__ - sub1), \
+     ((int) sizeof (c) * __CHAR_BIT__ - sub2))
+
+extern void link_error (void);
+
+unsigned int d;
+unsigned long e;
+unsigned long long f;
+
+void
+foo (unsigned int a, unsigned long b, unsigned long long c)
+{
+  B (parity, 0, 1)
+  C (ffs, 0, 0)
+  C (popcount, 0, 0)
+  C (clz, 0, 0)
+  C (ctz, -1, 0)
+  a &= 63;
+  b &= 63;
+  c &= 63;
+  B (ffs, 0, 6)
+  B (popcount, 0, 6)
+  a += 3; b += 3; c += 3;
+  B (ffs, 1, 7)
+  B (popcount, 1, 7)
+  a = 32U + (d & 1023U);
+  b = 32UL + (e & 1023UL);
+  c = 32ULL + (f & 1023ULL);
+  D (clz, 11, 6)
+  B (ctz, 0, 10)
+}
+
+void
+bar (int a, long b, long long c)
+{
+  C (clrsb, 0, 1)
+}
Richard Guenther - July 6, 2013, 9:25 a.m.
Jakub Jelinek <jakub@redhat.com> wrote:

>Hi!
>
>Attached are two versions of a patch to teach VRP about the int bitop
>builtins.  Both patches are identical for all builtins but
>__builtin_c[lt]z*, which are the only two from these that are
>documented
>to have undefined behavior on some argument (0).
>
>The first version is strict, it assumes __builtin_c[lt]z* (0) doesn't
>happen
>in valid programs, while the second one attempts to be less strict to
>avoid
>breaking stuff too much.
>
>The reason for writing the second patch is that longlong.h on various
>targets has stuff like:
>#ifdef __alpha_cix__
>#define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
>#define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
>#define COUNT_LEADING_ZEROS_0 64
>#else
>and documents that if COUNT_LEADING_ZEROS_0 is defined, then
>count_leading_zeros (cnt, 0) should be well defined and set
>cnt to COUNT_LEADING_ZEROS_0.  While neither gcc nor glibc use
>COUNT_LEADING_ZEROS_0 macro, I'm a little bit afraid some code in the
>wild
>might do, and it might even have its own copy of longlong.h, so even if
>we've removed those COUNT_LEADING_ZEROS_0 macros for targets that
>use the builtins, something could stay broken.  So, what the patch does
>is if an optab exists for the mode of the builtin's argument and
>C?Z_DEFINED_VALUE_AT_ZERO is defined, then it will add that value to
>the
>range unless VR of argument is non-zero (well, it handles only a few
>interesting commonly used values, for CLZ only precision of the mode
>(seems right now when CLZ_DEFINED_VALUE_AT_ZERO is non-zero, it sets
>it always to bitsize of the mode, and even widening or double word
>narrowing expansion should maintain this property), for CTZ -1 and
>bitsize).  If there isn't an optab for it, for CLZ it still assumes
>it might be bitsize, for CTZ it just assumes it is undefined behavior
>otherwise, because if I understand the code right, for CTZ we really
>could
>return various values for 0 without hw support for the mode, e.g. when
>CTZ is implemented using CLZ, it might return something, if we use
>wider
>mode hw CTZ and it would return bitsize, that would be bitsize of the
>wider
>mode etc.  I bet longlong.h only uses __builtin_c?z builtins for modes
>actually implemented in hw anyway (otherwise it couldn't be used safely
>in
>libgcc implementation of those libcalls).
>
>Both patches have been bootstrapped/regtested on x86_64-linux and
>i686-linux, which one do you prefer?

The less strict variant.

Thanks 
Richard.

>	Jakub
Richard Earnshaw - July 9, 2013, 9:20 a.m.
On 05/07/13 16:01, Jakub Jelinek wrote:
> Hi!
>
> Attached are two versions of a patch to teach VRP about the int bitop
> builtins.  Both patches are identical for all builtins but
> __builtin_c[lt]z*, which are the only two from these that are documented
> to have undefined behavior on some argument (0).
>
> The first version is strict, it assumes __builtin_c[lt]z* (0) doesn't happen
> in valid programs, while the second one attempts to be less strict to avoid
> breaking stuff too much.
>
> The reason for writing the second patch is that longlong.h on various
> targets has stuff like:
> #ifdef __alpha_cix__
> #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
> #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
> #define COUNT_LEADING_ZEROS_0 64
> #else
> and documents that if COUNT_LEADING_ZEROS_0 is defined, then
> count_leading_zeros (cnt, 0) should be well defined and set
> cnt to COUNT_LEADING_ZEROS_0.  While neither gcc nor glibc use
> COUNT_LEADING_ZEROS_0 macro, I'm a little bit afraid some code in the wild
> might do, and it might even have its own copy of longlong.h, so even if
> we've removed those COUNT_LEADING_ZEROS_0 macros for targets that
> use the builtins, something could stay broken.  So, what the patch does
> is if an optab exists for the mode of the builtin's argument and
> C?Z_DEFINED_VALUE_AT_ZERO is defined, then it will add that value to the
> range unless VR of argument is non-zero (well, it handles only a few
> interesting commonly used values, for CLZ only precision of the mode
> (seems right now when CLZ_DEFINED_VALUE_AT_ZERO is non-zero, it sets
> it always to bitsize of the mode, and even widening or double word
> narrowing expansion should maintain this property), for CTZ -1 and
> bitsize).  If there isn't an optab for it, for CLZ it still assumes
> it might be bitsize, for CTZ it just assumes it is undefined behavior
> otherwise, because if I understand the code right, for CTZ we really could
> return various values for 0 without hw support for the mode, e.g. when
> CTZ is implemented using CLZ, it might return something, if we use wider
> mode hw CTZ and it would return bitsize, that would be bitsize of the wider
> mode etc.  I bet longlong.h only uses __builtin_c?z builtins for modes
> actually implemented in hw anyway (otherwise it couldn't be used safely in
> libgcc implementation of those libcalls).
>
> Both patches have been bootstrapped/regtested on x86_64-linux and
> i686-linux, which one do you prefer?
>
> 	Jakub
>
>

On ARM, CLZ has a defined result at zero (32).  Furthermore, the ACLE 
specification defines (in the header arm_acle.h)  __clz(n) as an 
intrinsic aimed at the CLZ instruction; __clz() has a defined result at 
0.  We want to use __builtin_clz as the implementation for __clz rather 
than inventing another one; but that would require the compiler to 
handle zero correctly.

R.
Jakub Jelinek - July 9, 2013, 9:35 a.m.
On Tue, Jul 09, 2013 at 10:20:18AM +0100, Richard Earnshaw wrote:
> On ARM, CLZ has a defined result at zero (32).  Furthermore, the
> ACLE specification defines (in the header arm_acle.h)  __clz(n) as
> an intrinsic aimed at the CLZ instruction; __clz() has a defined
> result at 0.  We want to use __builtin_clz as the implementation for
> __clz rather than inventing another one; but that would require the
> compiler to handle zero correctly.

The patch that has been committed is the conservative one, so it
handles any __builtin_clz{,l,ll,imax} (0) returning the mode bitsize
(because that is pretty much the only value used for 0 by targets if
they specify it).  __builtin_ctz{,l,ll,imax} (0) is a different matter,
because the value at 0 varries a lot (-1, mode bitsize, undefined)
for the cases where there is optab, and for the case where __builtin_ctz
needs to be implemented say using clz, or in wider mode, or through library
routines you really can't expect anything meaningful.
So, for CTZ you get the target defined value for 0 if there is one only
if you have a ctz optab for that mode and CTZ_DEFINED_VALUE_AT_ZERO,
otherwise the patch treats it as undefined.

	Jakub
Joseph S. Myers - July 9, 2013, 3:47 p.m.
On Tue, 9 Jul 2013, Richard Earnshaw wrote:

> On ARM, CLZ has a defined result at zero (32).  Furthermore, the ACLE
> specification defines (in the header arm_acle.h)  __clz(n) as an intrinsic
> aimed at the CLZ instruction; __clz() has a defined result at 0.  We want to
> use __builtin_clz as the implementation for __clz rather than inventing
> another one; but that would require the compiler to handle zero correctly.

Assuming the header will come with GCC, it can assume semantics we don't 
document for user code - such as that __builtin_clz is defined at 0, if 
that is the case with a given GCC version and target architecture.  If the 
semantics change, the header can then be changed at the same time.

(I'd still encourage user code wanting a defined value at 0 to do e.g.

static inline int clz (int n) { return n == 0 ? 32 : __builtin_clz (n); }

and hope GCC will optimize away the test for 0 when the instruction 
semantics make it unnecessary - if it doesn't, it should be fixed to do 
so.  And it's certainly fine to put such code in an intrinsic header if 
useful.)

Patch

--- gcc/fold-const.c.jj	2013-06-19 19:28:32.000000000 +0200
+++ gcc/fold-const.c	2013-07-04 12:13:28.153901855 +0200
@@ -15606,6 +15606,9 @@  tree_call_nonnegative_warnv_p (tree type
 	CASE_INT_FN (BUILT_IN_FFS):
 	CASE_INT_FN (BUILT_IN_PARITY):
 	CASE_INT_FN (BUILT_IN_POPCOUNT):
+	CASE_INT_FN (BUILT_IN_CLZ):
+	CASE_INT_FN (BUILT_IN_CTZ):
+	CASE_INT_FN (BUILT_IN_CLRSB):
       case BUILT_IN_BSWAP32:
       case BUILT_IN_BSWAP64:
 	/* Always true.  */
--- gcc/tree.h.jj	2013-06-23 20:43:31.000000000 +0200
+++ gcc/tree.h	2013-07-04 11:42:42.403530159 +0200
@@ -322,7 +322,7 @@  extern const char * built_in_names[(int)
 
 #define CASE_FLT_FN(FN) case FN: case FN##F: case FN##L
 #define CASE_FLT_FN_REENT(FN) case FN##_R: case FN##F_R: case FN##L_R
-#define CASE_INT_FN(FN) case FN: case FN##L: case FN##LL
+#define CASE_INT_FN(FN) case FN: case FN##L: case FN##LL: case FN##IMAX
 
 /* In an OMP_CLAUSE node.  */
 
--- gcc/tree-vrp.c.jj	2013-05-24 20:07:40.000000000 +0200
+++ gcc/tree-vrp.c	2013-07-04 13:24:05.044607676 +0200
@@ -3565,20 +3565,130 @@  extract_range_basic (value_range_t *vr,
   bool sop = false;
   tree type = gimple_expr_type (stmt);
 
-  /* If the call is __builtin_constant_p and the argument is a
-     function parameter resolve it to false.  This avoids bogus
-     array bound warnings.
-     ???  We could do this as early as inlining is finished.  */
-  if (gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
+  if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
     {
-      tree arg = gimple_call_arg (stmt, 0);
-      if (TREE_CODE (arg) == SSA_NAME
-	  && SSA_NAME_IS_DEFAULT_DEF (arg)
-	  && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL)
-	set_value_range_to_null (vr, type);
+      tree fndecl = gimple_call_fndecl (stmt), arg;
+      int mini, maxi, prec;
+
+      switch (DECL_FUNCTION_CODE (fndecl))
+	{
+	case BUILT_IN_CONSTANT_P:
+	  /* If the call is __builtin_constant_p and the argument is a
+	     function parameter resolve it to false.  This avoids bogus
+	     array bound warnings.
+	     ???  We could do this as early as inlining is finished.  */
+	  arg = gimple_call_arg (stmt, 0);
+	  if (TREE_CODE (arg) == SSA_NAME
+	      && SSA_NAME_IS_DEFAULT_DEF (arg)
+	      && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL)
+	    {
+	      set_value_range_to_null (vr, type);
+	      return;
+	    }
+	  break;
+	  /* Both __builtin_ffs* and __builtin_popcount return
+	     [0, prec].  */
+	CASE_INT_FN (BUILT_IN_FFS):
+	CASE_INT_FN (BUILT_IN_POPCOUNT):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec;
+	  if (TREE_CODE (arg) == SSA_NAME)
+	    {
+	      value_range_t *vr0 = get_value_range (arg);
+	      /* If arg is non-zero, then ffs or popcount
+		 are non-zero.  */
+	      if (((vr0->type == VR_RANGE
+		    && integer_nonzerop (vr0->min))
+		   || (vr0->type == VR_ANTI_RANGE
+		       && integer_zerop (vr0->min)))
+		  && !TREE_OVERFLOW (vr0->min))
+		mini = 1;
+	      /* If some high bits are known to be zero,
+		 we can decrease the maximum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->max) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->max))
+		maxi = tree_floor_log2 (vr0->max) + 1;
+	    }
+	  goto bitop_builtin;
+	  /* __builtin_parity* returns [0, 1].  */
+	CASE_INT_FN (BUILT_IN_PARITY):
+	  mini = 0;
+	  maxi = 1;
+	  goto bitop_builtin;
+	  /* __builtin_c[lt]z* return [0, prec-1], except for
+	     when the argument is 0, but that is undefined behavior.  */
+	CASE_INT_FN (BUILT_IN_CLZ):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec - 1;
+	  if (TREE_CODE (arg) == SSA_NAME)
+	    {
+	      value_range_t *vr0 = get_value_range (arg);
+	      /* From clz of VR_RANGE minimum we can compute
+		 result maximum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->min) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->min))
+		{
+		  maxi -= tree_floor_log2 (vr0->min);
+		  if (maxi == prec)
+		    maxi = prec - 1;
+		}
+	      /* From clz of VR_RANGE maximum we can compute
+		 result minimum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->max) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->max))
+		{
+		  mini = prec - 1 - tree_floor_log2 (vr0->max);
+		  if (mini == prec)
+		    break;
+		}
+	    }
+	  goto bitop_builtin;
+	  /* __builtin_ctz* return [0, prec-1], except for
+	     when the argument is 0, but that is undefined behavior.  */
+	CASE_INT_FN (BUILT_IN_CTZ):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec - 1;
+	  if (TREE_CODE (arg) == SSA_NAME)
+	    {
+	      value_range_t *vr0 = get_value_range (arg);
+	      /* If some high bits are known to be zero,
+		 we can decrease the result maximum.  */
+	      if (vr0->type == VR_RANGE
+		  && TREE_CODE (vr0->max) == INTEGER_CST
+		  && !TREE_OVERFLOW (vr0->max))
+		{
+		  maxi = tree_floor_log2 (vr0->max);
+		  if (maxi == -1)
+		    break;
+		}
+	    }
+	  goto bitop_builtin;
+	  /* __builtin_clrsb* returns [0, prec-1].  */
+	CASE_INT_FN (BUILT_IN_CLRSB):
+	  arg = gimple_call_arg (stmt, 0);
+	  prec = TYPE_PRECISION (TREE_TYPE (arg));
+	  mini = 0;
+	  maxi = prec - 1;
+	  goto bitop_builtin;
+	bitop_builtin:
+	  set_value_range (vr, VR_RANGE, build_int_cst (type, mini),
+			   build_int_cst (type, maxi), NULL);
+	  return;
+	default:
+	  break;
+	}
     }
-  else if (INTEGRAL_TYPE_P (type)
-	   && gimple_stmt_nonnegative_warnv_p (stmt, &sop))
+  if (INTEGRAL_TYPE_P (type)
+      && gimple_stmt_nonnegative_warnv_p (stmt, &sop))
     set_value_range_to_nonnegative (vr, type,
 				    sop || stmt_overflow_infinity (stmt));
   else if (vrp_stmt_computes_nonzero (stmt, &sop)
--- gcc/builtins.c.jj	2013-07-03 22:15:05.000000000 +0200
+++ gcc/builtins.c	2013-07-04 11:43:31.801693566 +0200
@@ -6107,7 +6107,6 @@  expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_FFS):
-    case BUILT_IN_FFSIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, ffs_optab);
       if (target)
@@ -6115,7 +6114,6 @@  expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_CLZ):
-    case BUILT_IN_CLZIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, clz_optab);
       if (target)
@@ -6123,7 +6121,6 @@  expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_CTZ):
-    case BUILT_IN_CTZIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, ctz_optab);
       if (target)
@@ -6131,7 +6128,6 @@  expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_CLRSB):
-    case BUILT_IN_CLRSBIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, clrsb_optab);
       if (target)
@@ -6139,7 +6135,6 @@  expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_POPCOUNT):
-    case BUILT_IN_POPCOUNTIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, popcount_optab);
       if (target)
@@ -6147,7 +6142,6 @@  expand_builtin (tree exp, rtx target, rt
       break;
 
     CASE_INT_FN (BUILT_IN_PARITY):
-    case BUILT_IN_PARITYIMAX:
       target = expand_builtin_unop (target_mode, exp, target,
 				    subtarget, parity_optab);
       if (target)
@@ -8152,6 +8146,8 @@  fold_builtin_bitop (tree fndecl, tree ar
 	  break;
 
 	CASE_INT_FN (BUILT_IN_CLRSB):
+	  if (width > 2 * HOST_BITS_PER_WIDE_INT)
+	    return NULL_TREE;
 	  if (width > HOST_BITS_PER_WIDE_INT
 	      && (hi & ((unsigned HOST_WIDE_INT) 1
 			<< (width - HOST_BITS_PER_WIDE_INT - 1))) != 0)
--- libgcc/libgcc2.c.jj	2013-07-03 13:01:15.000000000 +0200
+++ libgcc/libgcc2.c	2013-07-05 08:54:42.080525408 +0200
@@ -1571,7 +1571,7 @@  FUNC (DWtype u)
   /* Otherwise, find the power of two.  */
   Wtype hi = u >> W_TYPE_SIZE;
   if (hi < 0)
-    hi = -hi;
+    hi = -(UWtype) hi;
 
   UWtype count, shift;
   count_leading_zeros (count, hi);
--- gcc/testsuite/gcc.dg/tree-ssa/vrp89.c.jj	2013-07-04 12:19:17.271993105 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/vrp89.c	2013-07-04 13:44:48.000000000 +0200
@@ -0,0 +1,57 @@ 
+/* PR target/29776 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1" } */
+/* { dg-final { scan-tree-dump-not "link_error" "vrp1"} } */
+/* { dg-final { cleanup-tree-dump "vrp1" } } */
+
+#define A(fn, arg, min, max) \
+  if (__builtin_##fn (arg) < min || __builtin_##fn (arg) > max) \
+    link_error ();
+#define B(fn, min, max) \
+  A (fn, a, min, max) A (fn##l, b, min, max) A (fn##ll, c, min, max)
+#define C(fn, sub) \
+  A (fn, a, 0, ((int) sizeof (a) * __CHAR_BIT__ - sub)) \
+  A (fn##l, b, 0, ((int) sizeof (b) * __CHAR_BIT__ - sub)) \
+  A (fn##ll, c, 0, ((int) sizeof (c) * __CHAR_BIT__ - sub))
+#define D(fn, sub1, sub2) \
+  A (fn, a, ((int) sizeof (a) * __CHAR_BIT__ - sub1), \
+     ((int) sizeof (a) * __CHAR_BIT__ - sub2)) \
+  A (fn##l, b, ((int) sizeof (b) * __CHAR_BIT__ - sub1), \
+     ((int) sizeof (b) * __CHAR_BIT__ - sub2)) \
+  A (fn##ll, c, ((int) sizeof (c) * __CHAR_BIT__ - sub1), \
+     ((int) sizeof (c) * __CHAR_BIT__ - sub2))
+
+extern void link_error (void);
+
+unsigned int d;
+unsigned long e;
+unsigned long long f;
+
+void
+foo (unsigned int a, unsigned long b, unsigned long long c)
+{
+  B (parity, 0, 1)
+  C (ffs, 0)
+  C (popcount, 0)
+  C (clz, 1)
+  C (ctz, 1)
+  a &= 63;
+  b &= 63;
+  c &= 63;
+  B (ffs, 0, 6)
+  B (popcount, 0, 6)
+  a += 3; b += 3; c += 3;
+  B (ffs, 1, 7)
+  B (popcount, 1, 7)
+  a = 32U + (d & 1023U);
+  b = 32UL + (e & 1023UL);
+  c = 32ULL + (f & 1023ULL);
+  D (clz, 11, 6)
+  B (ctz, 0, 10)
+}
+
+void
+bar (int a, long b, long long c)
+{
+  C (clrsb, 1)
+}