Patchwork [committed] Fix expand_mult (PR middle-end/56420)

login
register
mail settings
Submitter Jakub Jelinek
Date Feb. 21, 2013, 9:35 p.m.
Message ID <20130221213514.GH1215@tucnak.zalov.cz>
Download mbox | patch
Permalink /patch/222419/
State New
Headers show

Comments

Jakub Jelinek - Feb. 21, 2013, 9:35 p.m.
Hi!

I've committed the following fix for the following testcase.
When scalar_op1 is 0xffffffffffffffff8000000000000000 with 64-bit HWI,
it matches EXACT_POWER_OF_2_OR_ZERO_P, but we should expand it as
negation of the << 63 shift rather than the << 63 shift alone.

The patch also improves multiplication by 0x8000000000000000
in TImode, which can be done as << 63 shift, and multiplication by
-(((TImode)1) << N) for N 0 through 62, which can be also expanded as
negation of left shift.

Furthermore I've noticed several places where we could invoke signed integer
overflows inside of the compiler and fixed them.

Bootstrapped/regtested on x86_64-linux and i686-linux, acked by Richard
on IRC, committed to trunk.

2013-02-21  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/56420
	* expmed.c (EXACT_POWER_OF_2_OR_ZERO_P): Do subtraction in uhwi, to
	avoid signed wrapping.
	(expand_mult): Handle properly multiplication by
	((dword_type) -1) << (BITS_PER_WORD - 1).  Improve multiplication by
	((dword_type) 1) << (BITS_PER_WORD - 1).  Avoid undefined behavior
	in the compiler if coeff is HOST_WIDE_INT_MIN.
	(expand_divmod): Don't make ext_op1 static, change it's type to
	uhwi.  Avoid undefined behavior in -INTVAL (op1).

	* gcc.dg/torture/pr56420.c: New test.


	Jakub

Patch

--- gcc/expmed.c.jj	2013-02-13 21:46:52.000000000 +0100
+++ gcc/expmed.c	2013-02-21 19:25:05.692298011 +0100
@@ -64,7 +64,8 @@  static rtx expand_smod_pow2 (enum machin
 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
 
 /* Test whether a value is zero of a power of two.  */
-#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
+#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
+  (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 
 struct init_expmed_rtl
 {
@@ -3079,7 +3080,10 @@  expand_mult (enum machine_mode mode, rtx
 	  /* If we are multiplying in DImode, it may still be a win
 	     to try to work with shifts and adds.  */
 	  if (CONST_DOUBLE_HIGH (scalar_op1) == 0
-	      && CONST_DOUBLE_LOW (scalar_op1) > 0)
+	      && (CONST_DOUBLE_LOW (scalar_op1) > 0
+		  || (CONST_DOUBLE_LOW (scalar_op1) < 0
+		      && EXACT_POWER_OF_2_OR_ZERO_P
+			   (CONST_DOUBLE_LOW (scalar_op1)))))
 	    {
 	      coeff = CONST_DOUBLE_LOW (scalar_op1);
 	      is_neg = false;
@@ -3109,7 +3113,8 @@  expand_mult (enum machine_mode mode, rtx
 	 use synth_mult.  */
 
       /* Special case powers of two.  */
-      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
+      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
+	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
 	return expand_shift (LSHIFT_EXPR, mode, op0,
 			     floor_log2 (coeff), target, unsignedp);
 
@@ -3124,13 +3129,24 @@  expand_mult (enum machine_mode mode, rtx
 	     result is interpreted as an unsigned coefficient.
 	     Exclude cost of op0 from max_cost to match the cost
 	     calculation of the synth_mult.  */
+	  coeff = -(unsigned HOST_WIDE_INT) coeff;
 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
 		      - neg_cost(speed, mode));
-	  if (max_cost > 0
-	      && choose_mult_variant (mode, -coeff, &algorithm,
-				      &variant, max_cost))
+	  if (max_cost <= 0)
+	    goto skip_synth;
+
+	  /* Special case powers of two.  */
+	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
+	    {
+	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
+				       floor_log2 (coeff), target, unsignedp);
+	      return expand_unop (mode, neg_optab, temp, target, 0);
+	    }
+
+	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
+				   max_cost))
 	    {
-	      rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
+	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
 					    &algorithm, variant);
 	      return expand_unop (mode, neg_optab, temp, target, 0);
 	    }
@@ -3813,13 +3829,12 @@  expand_divmod (int rem_flag, enum tree_c
   int op1_is_constant, op1_is_pow2 = 0;
   int max_cost, extra_cost;
   static HOST_WIDE_INT last_div_const = 0;
-  static HOST_WIDE_INT ext_op1;
   bool speed = optimize_insn_for_speed_p ();
 
   op1_is_constant = CONST_INT_P (op1);
   if (op1_is_constant)
     {
-      ext_op1 = INTVAL (op1);
+      unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
       if (unsignedp)
 	ext_op1 &= GET_MODE_MASK (mode);
       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
@@ -3967,7 +3982,7 @@  expand_divmod (int rem_flag, enum tree_c
       op1_is_pow2 = (op1_is_constant
 		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
 			  || (! unsignedp
-			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
+			      && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
     }
 
   /* If one of the operands is a volatile MEM, copy it into a register.  */
--- gcc/testsuite/gcc.dg/torture/pr56420.c.jj	2013-02-21 18:37:54.720565659 +0100
+++ gcc/testsuite/gcc.dg/torture/pr56420.c	2013-02-21 19:08:41.000000000 +0100
@@ -0,0 +1,37 @@ 
+/* PR middle-end/56420 */
+/* { dg-do run { target int128 } } */
+
+extern void abort (void);
+
+__attribute__((noinline, noclone)) __uint128_t
+foo (__uint128_t x)
+{
+  return x * (((__uint128_t) -1) << 63);
+}
+
+__attribute__((noinline, noclone)) __uint128_t
+bar (__uint128_t x)
+{
+  return x * (((__uint128_t) 1) << 63);
+}
+
+__attribute__((noinline, noclone)) __uint128_t
+baz (__uint128_t x)
+{
+  return x * -(((__uint128_t) 1) << 62);
+}
+
+int
+main ()
+{
+  if (foo (1) != (((__uint128_t) -1) << 63)
+      || foo (8) != (((__uint128_t) -1) << 66))
+    abort ();
+  if (bar (1) != (((__uint128_t) 1) << 63)
+      || bar (8) != (((__uint128_t) 1) << 66))
+    abort ();
+  if (baz (1) != -(((__uint128_t) 1) << 62)
+      || baz (8) != ((-(((__uint128_t) 1) << 62)) << 3))
+    abort ();
+  return 0;
+}