Patchwork Improve btc (PR target/57819)

login
register
mail settings
Submitter Jakub Jelinek
Date July 5, 2013, 3:15 p.m.
Message ID <20130705151551.GX2336@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/257188/
State New
Headers show

Comments

Jakub Jelinek - July 5, 2013, 3:15 p.m.
Hi!

Kai has reported his type demotion patches lead to a regression, which can
be seen also without his patches by doing the type demotion by hand.
test1 is optimized using *jcc_bt<mode>_mask instruction (combiner detects
this), but test2 isn't.  In that case combiner first merges the and with
shift into *<shift_insn><mode>3_mask insn, and *jcc_bt<mode>_mask won't
match, because we end up with
(zero_extend:SI (subreg:QI (and:SI <something> (const_int 63)) 0))
and we don't simplify that.

So, my first approach was trying to simplify that, because nonzero_bits
on the subreg operand say that no bits outside of QImode may be non-zero,
both the zero_extend and subreg can be dropped.  That is the simplify-rtx.c
change.  Then I've figured out that combine.c doesn't actually attempt to
simplify this anyway, so that is the combine.c change.  And lastly an i386
pattern was needed anyway.  I've also attempted to simplify:
(zero_extend:SI (subreg:QI (and:DI <something> (const_int 63)) 0))
into
(subreg:SI (and:DI <something> (const_int 63)) 0) (very small change in
simplify-rtx.c, just drop the requirement that zero_extend mode is as wide
or wider than SUBREG_REG's mode, and when it is <= use gen_lowpart_no_emit
instead of just returning the SUBREG_REG, but that unfortunately regressed
the test1 case, we'd need some further i386.md tweaks.
While in theory this folding looks like a useful simplification, because
of this I'm wondering if other backends don't rely on those actually not
being simplified.

So, I've as an alternative implemented also an i386.md only fix.

Thus, do we want the first patch, or first patch + also the
above described further simplify-rtx.c change + some further i386.md tweaks,
or just the second patch instead?

Both have been bootstrapped/regtested on x86_64-linux and i686-linux.

	Jakub
2013-07-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/57819
	* simplify-rtx.c (simplify_unary_operation_1) <case ZERO_EXTEND>:
	Simplify (zero_extend:SI (subreg:QI (and:SI (reg:SI)
	(const_int 63)) 0)).
	* combine.c (make_extraction): Create ZERO_EXTEND or SIGN_EXTEND
	using simplify_gen_unary instead of gen_rtx_*_EXTEND.
	* config/i386/i386.md (*jcc_bt<mode>_1): New define_insn_and_split.

	* gcc.target/i386/pr57819.c: New test.
2013-07-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/57819
	* config/i386/i386.md (*jcc_bt<mode>_mask_1): New
	define_insn_and_split.

	* gcc.target/i386/pr57819.c: New test.

--- gcc/config/i386/i386.md.jj	2013-06-27 18:47:32.000000000 +0200
+++ gcc/config/i386/i386.md	2013-07-04 16:54:48.789218553 +0200
@@ -10510,6 +10510,45 @@ (define_insn_and_split "*jcc_bt<mode>_ma
   PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
 })
 
+;; Like *jcc_bt<mode>_mask, but for the case where AND has been previously
+;; combined with a shift.
+(define_insn_and_split "*jcc_bt<mode>_mask_1"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
+			   (const_int 1)
+			   (zero_extend:SI
+			     (subreg:QI
+			       (and:SI
+				 (match_operand:SI 2 "register_operand" "r")
+				 (match_operand:SI 3 "const_int_operand" "n"))
+			       0)))])
+		      (label_ref (match_operand 4))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
 (define_insn_and_split "*jcc_btsi_1"
   [(set (pc)
   	(if_then_else (match_operator 0 "bt_comparison_operator"
--- gcc/testsuite/gcc.target/i386/pr57819.c.jj	2013-07-04 16:27:46.900877301 +0200
+++ gcc/testsuite/gcc.target/i386/pr57819.c	2013-07-04 16:27:30.000000000 +0200
@@ -0,0 +1,38 @@
+/* PR target/57819 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+void foo (void);
+
+__extension__ typedef __INTPTR_TYPE__ intptr_t;
+
+int
+test1 (intptr_t x, intptr_t n)
+{
+  n &= sizeof (intptr_t) * __CHAR_BIT__ - 1;
+
+  if (x & ((intptr_t) 1 << n))
+    foo ();
+
+  return 0;
+}
+
+int
+test2 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & (sizeof (intptr_t) * __CHAR_BIT__ - 1))))
+    foo ();
+
+  return 0;
+}
+
+int
+test3 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & ((int) sizeof (intptr_t) * __CHAR_BIT__ - 1))))
+    foo ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */
Eric Botcazou - July 8, 2013, 8:24 a.m.
> So, my first approach was trying to simplify that, because nonzero_bits
> on the subreg operand say that no bits outside of QImode may be non-zero,
> both the zero_extend and subreg can be dropped.  That is the simplify-rtx.c
> change.  Then I've figured out that combine.c doesn't actually attempt to
> simplify this anyway, so that is the combine.c change.  And lastly an i386
> pattern was needed anyway.  I've also attempted to simplify:
> (zero_extend:SI (subreg:QI (and:DI <something> (const_int 63)) 0))
> into
> (subreg:SI (and:DI <something> (const_int 63)) 0) (very small change in
> simplify-rtx.c, just drop the requirement that zero_extend mode is as wide
> or wider than SUBREG_REG's mode, and when it is <= use gen_lowpart_no_emit
> instead of just returning the SUBREG_REG, but that unfortunately regressed
> the test1 case, we'd need some further i386.md tweaks.
> While in theory this folding looks like a useful simplification, because
> of this I'm wondering if other backends don't rely on those actually not
> being simplified.
> 
> So, I've as an alternative implemented also an i386.md only fix.
> 
> Thus, do we want the first patch, or first patch + also the
> above described further simplify-rtx.c change + some further i386.md tweaks,
> or just the second patch instead?

Given the look of the pattern in the second patch, I think we definitely want 
to simplify upstream.  The first simplification looks very natural to me, the 
further tweaked one less so, so let's go for the first patch alone, with a 
small tweak:

+      /* (zero_extend:M (subreg:N <X:O>)) is <X:O> (for M == O) or
+	 (zero_extend:M <X:O>), if X doesn't have any bits outside of N mode
+	 non-zero.  E.g.

"if X doesn't have any non-zero bits outside of mode N."

Patch

--- gcc/simplify-rtx.c.jj	2013-06-01 14:47:23.000000000 +0200
+++ gcc/simplify-rtx.c	2013-07-04 16:24:48.654817120 +0200
@@ -1470,6 +1470,29 @@  simplify_unary_operation_1 (enum rtx_cod
 	    }
 	}
 
+      /* (zero_extend:M (subreg:N <X:O>)) is <X:O> (for M == O) or
+	 (zero_extend:M <X:O>), if X doesn't have any bits outside of N mode
+	 non-zero.  E.g.
+	 (zero_extend:SI (subreg:QI (and:SI (reg:SI) (const_int 63)) 0)) is
+	 (and:SI (reg:SI) (const_int 63)).  */
+      if (GET_CODE (op) == SUBREG
+	  && GET_MODE_PRECISION (GET_MODE (op))
+	     < GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op)))
+	  && GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op)))
+	     <= HOST_BITS_PER_WIDE_INT
+	  && GET_MODE_PRECISION (mode)
+	     >= GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op)))
+	  && subreg_lowpart_p (op)
+	  && (nonzero_bits (SUBREG_REG (op), GET_MODE (SUBREG_REG (op)))
+	      & ~GET_MODE_MASK (GET_MODE (op))) == 0)
+	{
+	  if (GET_MODE_PRECISION (mode)
+	      == GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op))))
+	    return SUBREG_REG (op);
+	  return simplify_gen_unary (ZERO_EXTEND, mode, SUBREG_REG (op),
+				     GET_MODE (SUBREG_REG (op)));
+	}
+
 #if defined(POINTERS_EXTEND_UNSIGNED) && !defined(HAVE_ptr_extend)
       /* As we do not know which address space the pointer is referring to,
 	 we can do this only if the target does not support different pointer
--- gcc/combine.c.jj	2013-05-04 14:40:40.000000000 +0200
+++ gcc/combine.c	2013-07-04 15:44:59.409575170 +0200
@@ -7326,7 +7326,8 @@  make_extraction (enum machine_mode mode,
   if (pos_rtx != 0
       && GET_MODE_SIZE (pos_mode) > GET_MODE_SIZE (GET_MODE (pos_rtx)))
     {
-      rtx temp = gen_rtx_ZERO_EXTEND (pos_mode, pos_rtx);
+      rtx temp = simplify_gen_unary (ZERO_EXTEND, pos_mode, pos_rtx,
+				     GET_MODE (pos_rtx));
 
       /* If we know that no extraneous bits are set, and that the high
 	 bit is not set, convert extraction to cheaper one - either
@@ -7340,7 +7341,8 @@  make_extraction (enum machine_mode mode,
 		       >> 1))
 		  == 0)))
 	{
-	  rtx temp1 = gen_rtx_SIGN_EXTEND (pos_mode, pos_rtx);
+	  rtx temp1 = simplify_gen_unary (SIGN_EXTEND, pos_mode, pos_rtx,
+					  GET_MODE (pos_rtx));
 
 	  /* Prefer ZERO_EXTENSION, since it gives more information to
 	     backends.  */
--- gcc/config/i386/i386.md.jj	2013-06-27 18:47:32.000000000 +0200
+++ gcc/config/i386/i386.md	2013-07-04 15:58:24.429243358 +0200
@@ -10474,6 +10474,39 @@  (define_insn_and_split "*jcc_bt<mode>"
   PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
 })
 
+;; Like *jcc_bt<mode>, but expect a SImode operand 2 instead of QImode
+;; zero extended to SImode.
+(define_insn_and_split "*jcc_bt<mode>_1"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
+			   (const_int 1)
+			   (match_operand:SI 2 "register_operand" "r"))
+			 (const_int 0)])
+		      (label_ref (match_operand 3))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
 ;; Avoid useless masking of bit offset operand.  "and" in SImode is correct
 ;; also for DImode, this is what combine produces.
 (define_insn_and_split "*jcc_bt<mode>_mask"
--- gcc/testsuite/gcc.target/i386/pr57819.c.jj	2013-07-04 16:27:46.900877301 +0200
+++ gcc/testsuite/gcc.target/i386/pr57819.c	2013-07-04 16:27:30.000000000 +0200
@@ -0,0 +1,38 @@ 
+/* PR target/57819 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+void foo (void);
+
+__extension__ typedef __INTPTR_TYPE__ intptr_t;
+
+int
+test1 (intptr_t x, intptr_t n)
+{
+  n &= sizeof (intptr_t) * __CHAR_BIT__ - 1;
+
+  if (x & ((intptr_t) 1 << n))
+    foo ();
+
+  return 0;
+}
+
+int
+test2 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & (sizeof (intptr_t) * __CHAR_BIT__ - 1))))
+    foo ();
+
+  return 0;
+}
+
+int
+test3 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & ((int) sizeof (intptr_t) * __CHAR_BIT__ - 1))))
+    foo ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */