Patchwork [committed] Fix expand_mult (PR middle-end/56420)

login
register
mail settings
Submitter Mike Stump
Date Aug. 19, 2013, 10:57 p.m.
Message ID <5E763C00-A094-4E46-8BDE-BC24FE52C1D1@comcast.net>
Download mbox | patch
Permalink /patch/268323/
State New
Headers show

Comments

Mike Stump - Aug. 19, 2013, 10:57 p.m.
On Feb 21, 2013, at 1:35 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> I've committed the following fix for the following testcase.
> When scalar_op1 is 0xffffffffffffffff8000000000000000 with 64-bit HWI,
> it matches EXACT_POWER_OF_2_OR_ZERO_P, but we should expand it as
> negation of the << 63 shift rather than the << 63 shift alone.
> 
> The patch also improves multiplication by 0x8000000000000000
> in TImode, which can be done as << 63 shift, and multiplication by
> -(((TImode)1) << N) for N 0 through 62, which can be also expanded as
> negation of left shift.

So, I noticed that I also need something like:



to not have this test case fail on me.  I widen to a larger size and then compare.  The problem is the code just after this don't handle widening, only narrowing.  Since the constants have no modes and since we want identity, we can just return x directly.

Thoughts?

> Furthermore I've noticed several places where we could invoke signed integer
> overflows inside of the compiler and fixed them.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, acked by Richard
> on IRC, committed to trunk.
> 
> 2013-02-21  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/56420
> 	* expmed.c (EXACT_POWER_OF_2_OR_ZERO_P): Do subtraction in uhwi, to
> 	avoid signed wrapping.
> 	(expand_mult): Handle properly multiplication by
> 	((dword_type) -1) << (BITS_PER_WORD - 1).  Improve multiplication by
> 	((dword_type) 1) << (BITS_PER_WORD - 1).  Avoid undefined behavior
> 	in the compiler if coeff is HOST_WIDE_INT_MIN.
> 	(expand_divmod): Don't make ext_op1 static, change it's type to
> 	uhwi.  Avoid undefined behavior in -INTVAL (op1).
> 
> 	* gcc.dg/torture/pr56420.c: New test.
> 
> --- gcc/expmed.c.jj	2013-02-13 21:46:52.000000000 +0100
> +++ gcc/expmed.c	2013-02-21 19:25:05.692298011 +0100
> @@ -64,7 +64,8 @@ static rtx expand_smod_pow2 (enum machin
> static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
> 
> /* Test whether a value is zero of a power of two.  */
> -#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
> +#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
> +  (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
> 
> struct init_expmed_rtl
> {
> @@ -3079,7 +3080,10 @@ expand_mult (enum machine_mode mode, rtx
> 	  /* If we are multiplying in DImode, it may still be a win
> 	     to try to work with shifts and adds.  */
> 	  if (CONST_DOUBLE_HIGH (scalar_op1) == 0
> -	      && CONST_DOUBLE_LOW (scalar_op1) > 0)
> +	      && (CONST_DOUBLE_LOW (scalar_op1) > 0
> +		  || (CONST_DOUBLE_LOW (scalar_op1) < 0
> +		      && EXACT_POWER_OF_2_OR_ZERO_P
> +			   (CONST_DOUBLE_LOW (scalar_op1)))))
> 	    {
> 	      coeff = CONST_DOUBLE_LOW (scalar_op1);
> 	      is_neg = false;
> @@ -3109,7 +3113,8 @@ expand_mult (enum machine_mode mode, rtx
> 	 use synth_mult.  */
> 
>       /* Special case powers of two.  */
> -      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
> +      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
> +	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
> 	return expand_shift (LSHIFT_EXPR, mode, op0,
> 			     floor_log2 (coeff), target, unsignedp);
> 
> @@ -3124,13 +3129,24 @@ expand_mult (enum machine_mode mode, rtx
> 	     result is interpreted as an unsigned coefficient.
> 	     Exclude cost of op0 from max_cost to match the cost
> 	     calculation of the synth_mult.  */
> +	  coeff = -(unsigned HOST_WIDE_INT) coeff;
> 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
> 		      - neg_cost(speed, mode));
> -	  if (max_cost > 0
> -	      && choose_mult_variant (mode, -coeff, &algorithm,
> -				      &variant, max_cost))
> +	  if (max_cost <= 0)
> +	    goto skip_synth;
> +
> +	  /* Special case powers of two.  */
> +	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
> +	    {
> +	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
> +				       floor_log2 (coeff), target, unsignedp);
> +	      return expand_unop (mode, neg_optab, temp, target, 0);
> +	    }
> +
> +	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
> +				   max_cost))
> 	    {
> -	      rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
> +	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
> 					    &algorithm, variant);
> 	      return expand_unop (mode, neg_optab, temp, target, 0);
> 	    }
> @@ -3813,13 +3829,12 @@ expand_divmod (int rem_flag, enum tree_c
>   int op1_is_constant, op1_is_pow2 = 0;
>   int max_cost, extra_cost;
>   static HOST_WIDE_INT last_div_const = 0;
> -  static HOST_WIDE_INT ext_op1;
>   bool speed = optimize_insn_for_speed_p ();
> 
>   op1_is_constant = CONST_INT_P (op1);
>   if (op1_is_constant)
>     {
> -      ext_op1 = INTVAL (op1);
> +      unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
>       if (unsignedp)
> 	ext_op1 &= GET_MODE_MASK (mode);
>       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
> @@ -3967,7 +3982,7 @@ expand_divmod (int rem_flag, enum tree_c
>       op1_is_pow2 = (op1_is_constant
> 		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
> 			  || (! unsignedp
> -			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
> +			      && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
>     }
> 
>   /* If one of the operands is a volatile MEM, copy it into a register.  */
> --- gcc/testsuite/gcc.dg/torture/pr56420.c.jj	2013-02-21 18:37:54.720565659 +0100
> +++ gcc/testsuite/gcc.dg/torture/pr56420.c	2013-02-21 19:08:41.000000000 +0100
> @@ -0,0 +1,37 @@
> +/* PR middle-end/56420 */
> +/* { dg-do run { target int128 } } */
> +
> +extern void abort (void);
> +
> +__attribute__((noinline, noclone)) __uint128_t
> +foo (__uint128_t x)
> +{
> +  return x * (((__uint128_t) -1) << 63);
> +}
> +
> +__attribute__((noinline, noclone)) __uint128_t
> +bar (__uint128_t x)
> +{
> +  return x * (((__uint128_t) 1) << 63);
> +}
> +
> +__attribute__((noinline, noclone)) __uint128_t
> +baz (__uint128_t x)
> +{
> +  return x * -(((__uint128_t) 1) << 62);
> +}
> +
> +int
> +main ()
> +{
> +  if (foo (1) != (((__uint128_t) -1) << 63)
> +      || foo (8) != (((__uint128_t) -1) << 66))
> +    abort ();
> +  if (bar (1) != (((__uint128_t) 1) << 63)
> +      || bar (8) != (((__uint128_t) 1) << 66))
> +    abort ();
> +  if (baz (1) != -(((__uint128_t) 1) << 62)
> +      || baz (8) != ((-(((__uint128_t) 1) << 62)) << 3))
> +    abort ();
> +  return 0;
> +}
> 
> 	Jakub

Patch

diff --git a/gcc/expr.c b/gcc/expr.c
index ea3ff90..0253c0a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -729,6 +729,11 @@  convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
       return immed_double_int_const (val, mode);
     }
 
+  if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (oldmode)
+      && GET_MODE_CLASS (mode) == MODE_INT
+      && CONST_SCALAR_INT_P (x))
+    return x;
+
   /* We can do this with a gen_lowpart if both desired and current modes
      are integer, and this is either a constant integer, a register, or a
      non-volatile MEM.  Except for the constant case where MODE is no