From patchwork Thu Sep 9 07:05:21 2010
ContentType: text/plain; charset="utf8"
MIMEVersion: 1.0
ContentTransferEncoding: 7bit
Subject: Optimize (X >> N) {>, >=, <,
<=} C in the combiner (PR treeoptimization/20517)
XPatchworkSubmitter: Jakub Jelinek
XPatchworkId: 64258
MessageId: <20100909070521.GT1269@tyanft4801.lab.bos.redhat.com>
To: gccpatches@gcc.gnu.org
Date: Thu, 9 Sep 2010 09:05:21 +0200
From: Jakub Jelinek
ListId:
Hi!
Currently combiner optimizes (X >> N) {==,!=,>,>=,<,<=} C
for constant N and C, provided C << N doesn't overflow and
X is known not to have any low N bits set, into
X op (C << N). But for {>,>=,<,<=} the requirement that the low N bits
of X are known to be zero is IMHO unnecessary.
For
(X >> N) >= C resp. (X >> N) < C
it is equivalent to
X >= (C << N) resp. X < (C << N)
for arbitrary low bits in X, and for
(X >> N) > C resp. (X >> N) <= C
it is equivalent to
X > ((C + 1) << N)  1 resp. X <= ((C + 1) << N)  1.
The difference this patch makes is on i?86 for f1:
 movl 4(%esp), %eax
 sarl $23, %eax
 cmpl $12, %eax
+ xorl %eax, %eax
+ cmpl $109051903, 4(%esp)
setg %al
 movzbl %al, %eax
ret
and on x86_64:
 sarl $23, %edi
xorl %eax, %eax
 cmpl $12, %edi
+ cmpl $109051903, %edi
setg %al
ret
(and for f3 similarly).
Bootstrapped/regtested on x86_64linux and i686linux, ok for trunk?
20100909 Jakub Jelinek
PR treeoptimization/20517
* combine.c (simplify_comparison): Optimize (X >> N) {>,>=,<,<+} C
even if low N bits of X aren't known to be zero.
* gcc.target/i386/pr20517.c: New test.
Jakub
 gcc/combine.c.jj 20100906 08:42:00.000000000 +0200
+++ gcc/combine.c 20100908 19:41:37.000000000 +0200
@@ 11773,13 +11773,14 @@ simplify_comparison (enum rtx_code code,
/* If we have (compare (xshiftrt FOO N) (const_int C)) and
the low order N bits of FOO are known to be zero, we can do this
by comparing FOO with C shifted left N bits so long as no
 overflow occurs. */
+ overflow occurs. Even if the low order N bits of FOO aren't known
+ to be zero, if the comparison is >= or < we can use the same
+ optimization and for > or <= by setting all the low
+ order N bits in the comparison constant. */
if (CONST_INT_P (XEXP (op0, 1))
 && INTVAL (XEXP (op0, 1)) >= 0
+ && INTVAL (XEXP (op0, 1)) > 0
&& INTVAL (XEXP (op0, 1)) < HOST_BITS_PER_WIDE_INT
&& mode_width <= HOST_BITS_PER_WIDE_INT
 && (nonzero_bits (XEXP (op0, 0), mode)
 & (((HOST_WIDE_INT) 1 << INTVAL (XEXP (op0, 1)))  1)) == 0
&& (((unsigned HOST_WIDE_INT) const_op
+ (GET_CODE (op0) != LSHIFTRT
? ((GET_MODE_MASK (mode) >> INTVAL (XEXP (op0, 1)) >> 1)
@@ 11787,15 +11788,34 @@ simplify_comparison (enum rtx_code code,
: 0))
<= GET_MODE_MASK (mode) >> INTVAL (XEXP (op0, 1))))
{
 /* If the shift was logical, then we must make the condition
 unsigned. */
 if (GET_CODE (op0) == LSHIFTRT)
 code = unsigned_condition (code);

 const_op <<= INTVAL (XEXP (op0, 1));
 op1 = GEN_INT (const_op);
 op0 = XEXP (op0, 0);
 continue;
+ if ((nonzero_bits (XEXP (op0, 0), mode)
+ & (((HOST_WIDE_INT) 1 << INTVAL (XEXP (op0, 1)))  1)) == 0
+  code == GE  code == GEU  code == LT  code == LTU)
+ {
+ /* If the shift was logical, then we must make the condition
+ unsigned. */
+ if (GET_CODE (op0) == LSHIFTRT)
+ code = unsigned_condition (code);
+
+ const_op <<= INTVAL (XEXP (op0, 1));
+ op1 = GEN_INT (const_op);
+ op0 = XEXP (op0, 0);
+ continue;
+ }
+ if (code == GT  code == GTU  code == LE  code == LEU)
+ {
+ /* If the shift was logical, then we must make the condition
+ unsigned. */
+ if (GET_CODE (op0) == LSHIFTRT)
+ code = unsigned_condition (code);
+
+ const_op <<= INTVAL (XEXP (op0, 1));
+ const_op
+ = (((HOST_WIDE_INT) 1 << INTVAL (XEXP (op0, 1)))  1);
+ op1 = GEN_INT (const_op);
+ op0 = XEXP (op0, 0);
+ continue;
+ }
}
/* If we are using this shift to extract just the sign bit, we
 gcc/testsuite/gcc.target/i386/pr20517.c.jj 20100908 20:06:34.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/pr20517.c 20100908 20:06:52.000000000 +0200
@@ 0,0 +1,22 @@
+/* PR treeoptimization/20517 */
+/* { dgdo compile } */
+/* { dgoptions "O2" } */
+
+int f1 (int x)
+{
+ return (x >> 23) > 12;
+}
+int f2 (int x)
+{
+ return x > ((13 << 23)  1);
+}
+int f3 (int x)
+{
+ return (x >> 23) >= 12;
+}
+int f4 (int x)
+{
+ return x >= (12 << 23);
+}
+
+/* { dgfinal { scanassemblernot "sarl" } } */