Patchwork Fix __bultin_clrsb* (PR middle-end/49489)

login
register
mail settings
Submitter Jakub Jelinek
Date June 21, 2011, 4:10 p.m.
Message ID <20110621161058.GJ16443@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/101308/
State New
Headers show

Comments

Jakub Jelinek - June 21, 2011, 4:10 p.m.
On Mon, Jun 20, 2011 at 09:38:22PM +0200, Bernd Schmidt wrote:
> On 06/16/2011 06:25 PM, Richard Henderson wrote:
> > On 06/16/2011 05:44 AM, Bernd Schmidt wrote:
> >> +@deftypefn {Built-in Function} int __builtin_clrsb (unsigned int x)
> >> +Returns the number of leading redundant sign bits in @var{x}, starting
> >> +at the most significant bit position.
> >> +@end deftypefn
> > 
> > Do we want a signed argument, since we're talking about signs?
> 
> Err, yes. It's signed everywhere else (builtins.def etc.).

But unfortunately not during expansion.
gcc.c-torture/execute/builtin-bitops-1.c fails on x86_64-linux, because
there is no SImode libcall, only DImode, and it was zero-extending instead
of sign-extending the argument, and not subtracting the mode difference
afterwards.  So int foo (int x) { return __builtin_clrsb (x); }
expanded the same as
int foo (int x) { return __builtin_clrsbl ((unsigned) x); }
instead of the correct
int foo (int x) { return __builtin_clrsbl (x) - 32; }

Fixed thusly, I've also added folding of constant arguments
during tree optimizations for it.

Ok for trunk?

2011-06-21  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/49489
	* builtins.c (expand_builtin_unop): Call expand_unop with 0 as
	unsignedp argument instead of 1 for clrsb_optab.
	(fold_builtin_bitop): Fix masking for width > HOST_BITS_PER_WIDE_INT
	and < 2 * HOST_BITS_PER_WIDE_INT.  Optimize BUILT_IN_CLRSB*.
	(fold_builtin_1): Call fold_builtin_binop for BUILT_IN_CLRSB*.
	* optabs.c (widen_leading): Call widen_operand and expand_unop
	with 0 as unsignedp argument instead of 1 for clrsb_optab.
	(expand_unop): Subtract difference of mode sizes also for
	clrsb_optab.


	Jakub
Bernd Schmidt - June 21, 2011, 4:15 p.m.
On 06/21/2011 06:10 PM, Jakub Jelinek wrote:
> gcc.c-torture/execute/builtin-bitops-1.c fails on x86_64-linux, because
> there is no SImode libcall, only DImode, and it was zero-extending instead
> of sign-extending the argument, and not subtracting the mode difference
> afterwards.  So int foo (int x) { return __builtin_clrsb (x); }
> expanded the same as
> int foo (int x) { return __builtin_clrsbl ((unsigned) x); }
> instead of the correct
> int foo (int x) { return __builtin_clrsbl (x) - 32; }
> 
> Fixed thusly, I've also added folding of constant arguments
> during tree optimizations for it.
> 
> Ok for trunk?

Ok.

> 	(fold_builtin_bitop): Fix masking for width > HOST_BITS_PER_WIDE_INT
> 	and < 2 * HOST_BITS_PER_WIDE_INT.  Optimize BUILT_IN_CLRSB*.

Oh that's where the folders were hiding? I thought we didn't have any
for this family of builtins.


Bernd

Patch

--- gcc/builtins.c.jj	2011-06-21 16:46:01.000000000 +0200
+++ gcc/builtins.c	2011-06-21 17:36:38.000000000 +0200
@@ -4578,7 +4578,7 @@  expand_builtin_unop (enum machine_mode t
   /* Compute op, into TARGET if possible.
      Set TARGET to wherever the result comes back.  */
   target = expand_unop (TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))),
-			op_optab, op0, target, 1);
+			op_optab, op0, target, op_optab != clrsb_optab);
   gcc_assert (target);
 
   return convert_to_mode (target_mode, target, 0);
@@ -7265,7 +7265,8 @@  fold_builtin_bitop (tree fndecl, tree ar
 	{
 	  hi = TREE_INT_CST_HIGH (arg);
 	  if (width < 2 * HOST_BITS_PER_WIDE_INT)
-	    hi &= ~((HOST_WIDE_INT) (-1) >> (width - HOST_BITS_PER_WIDE_INT));
+	    hi &= ~((unsigned HOST_WIDE_INT) (-1)
+		    << (width - HOST_BITS_PER_WIDE_INT));
 	}
       else
 	{
@@ -7303,6 +7304,26 @@  fold_builtin_bitop (tree fndecl, tree ar
 	    result = width;
 	  break;
 
+	CASE_INT_FN (BUILT_IN_CLRSB):
+	  if (width > HOST_BITS_PER_WIDE_INT
+	      && (hi & ((unsigned HOST_WIDE_INT) 1
+			<< (width - HOST_BITS_PER_WIDE_INT - 1))) != 0)
+	    {
+	      hi = ~hi & ~((unsigned HOST_WIDE_INT) (-1)
+			   << (width - HOST_BITS_PER_WIDE_INT - 1));
+	      lo = ~lo;
+	    }
+	  else if (width <= HOST_BITS_PER_WIDE_INT
+		   && (lo & ((unsigned HOST_WIDE_INT) 1 << (width - 1))) != 0)
+	    lo = ~lo & ~((unsigned HOST_WIDE_INT) (-1) << (width - 1));
+	  if (hi != 0)
+	    result = width - floor_log2 (hi) - 2 - HOST_BITS_PER_WIDE_INT;
+	  else if (lo != 0)
+	    result = width - floor_log2 (lo) - 2;
+	  else
+	    result = width - 1;
+	  break;
+
 	CASE_INT_FN (BUILT_IN_POPCOUNT):
 	  result = 0;
 	  while (lo)
@@ -9737,6 +9758,7 @@  fold_builtin_1 (location_t loc, tree fnd
     CASE_INT_FN (BUILT_IN_FFS):
     CASE_INT_FN (BUILT_IN_CLZ):
     CASE_INT_FN (BUILT_IN_CTZ):
+    CASE_INT_FN (BUILT_IN_CLRSB):
     CASE_INT_FN (BUILT_IN_POPCOUNT):
     CASE_INT_FN (BUILT_IN_PARITY):
       return fold_builtin_bitop (fndecl, arg0);
--- gcc/optabs.c.jj	2011-06-21 16:46:01.000000000 +0200
+++ gcc/optabs.c	2011-06-21 17:50:35.000000000 +0200
@@ -2350,8 +2350,10 @@  widen_leading (enum machine_mode mode, r
 
 	      if (target == 0)
 		target = gen_reg_rtx (mode);
-	      xop0 = widen_operand (op0, wider_mode, mode, true, false);
-	      temp = expand_unop (wider_mode, unoptab, xop0, NULL_RTX, true);
+	      xop0 = widen_operand (op0, wider_mode, mode,
+				    unoptab != clrsb_optab, false);
+	      temp = expand_unop (wider_mode, unoptab, xop0, NULL_RTX,
+				  unoptab != clrsb_optab);
 	      if (temp != 0)
 		temp = expand_binop (wider_mode, sub_optab, temp,
 				     GEN_INT (GET_MODE_BITSIZE (wider_mode)
@@ -3075,8 +3077,9 @@  expand_unop (enum machine_mode mode, opt
 				  unsignedp);
 
 	      /* If we are generating clz using wider mode, adjust the
-		 result.  */
-	      if (unoptab == clz_optab && temp != 0)
+		 result.  Similarly for clrsb.  */
+	      if ((unoptab == clz_optab || unoptab == clrsb_optab)
+		  && temp != 0)
 		temp = expand_binop (wider_mode, sub_optab, temp,
 				     GEN_INT (GET_MODE_BITSIZE (wider_mode)
 					      - GET_MODE_BITSIZE (mode)),