Comments
Patch
@@ -11773,13 +11773,14 @@ simplify_comparison (enum rtx_code code,
/* If we have (compare (xshiftrt FOO N) (const_int C)) and
the low order N bits of FOO are known to be zero, we can do this
by comparing FOO with C shifted left N bits so long as no
- overflow occurs. */
+ overflow occurs. Even if the low order N bits of FOO aren't known
+ to be zero, if the comparison is >= or < we can use the same
+ optimization and for > or <= by setting all the low
+ order N bits in the comparison constant. */
if (CONST_INT_P (XEXP (op0, 1))
- && INTVAL (XEXP (op0, 1)) >= 0
+ && INTVAL (XEXP (op0, 1)) > 0
&& INTVAL (XEXP (op0, 1)) < HOST_BITS_PER_WIDE_INT
&& mode_width <= HOST_BITS_PER_WIDE_INT
- && (nonzero_bits (XEXP (op0, 0), mode)
- & (((HOST_WIDE_INT) 1 << INTVAL (XEXP (op0, 1))) - 1)) == 0
&& (((unsigned HOST_WIDE_INT) const_op
+ (GET_CODE (op0) != LSHIFTRT
? ((GET_MODE_MASK (mode) >> INTVAL (XEXP (op0, 1)) >> 1)
@@ -11787,15 +11788,34 @@ simplify_comparison (enum rtx_code code,
: 0))
<= GET_MODE_MASK (mode) >> INTVAL (XEXP (op0, 1))))
{
- /* If the shift was logical, then we must make the condition
- unsigned. */
- if (GET_CODE (op0) == LSHIFTRT)
- code = unsigned_condition (code);
-
- const_op <<= INTVAL (XEXP (op0, 1));
- op1 = GEN_INT (const_op);
- op0 = XEXP (op0, 0);
- continue;
+ if ((nonzero_bits (XEXP (op0, 0), mode)
+ & (((HOST_WIDE_INT) 1 << INTVAL (XEXP (op0, 1))) - 1)) == 0
+ || code == GE || code == GEU || code == LT || code == LTU)
+ {
+ /* If the shift was logical, then we must make the condition
+ unsigned. */
+ if (GET_CODE (op0) == LSHIFTRT)
+ code = unsigned_condition (code);
+
+ const_op <<= INTVAL (XEXP (op0, 1));
+ op1 = GEN_INT (const_op);
+ op0 = XEXP (op0, 0);
+ continue;
+ }
+ if (code == GT || code == GTU || code == LE || code == LEU)
+ {
+ /* If the shift was logical, then we must make the condition
+ unsigned. */
+ if (GET_CODE (op0) == LSHIFTRT)
+ code = unsigned_condition (code);
+
+ const_op <<= INTVAL (XEXP (op0, 1));
+ const_op
+ |= (((HOST_WIDE_INT) 1 << INTVAL (XEXP (op0, 1))) - 1);
+ op1 = GEN_INT (const_op);
+ op0 = XEXP (op0, 0);
+ continue;
+ }
}
/* If we are using this shift to extract just the sign bit, we
@@ -0,0 +1,22 @@
+/* PR tree-optimization/20517 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int f1 (int x)
+{
+ return (x >> 23) > 12;
+}
+int f2 (int x)
+{
+ return x > ((13 << 23) - 1);
+}
+int f3 (int x)
+{
+ return (x >> 23) >= 12;
+}
+int f4 (int x)
+{
+ return x >= (12 << 23);
+}
+
+/* { dg-final { scan-assembler-not "sarl" } } */
Hi! Currently combiner optimizes (X >> N) {==,!=,>,>=,<,<=} C for constant N and C, provided C << N doesn't overflow and X is known not to have any low N bits set, into X op (C << N). But for {>,>=,<,<=} the requirement that the low N bits of X are known to be zero is IMHO unnecessary. For (X >> N) >= C resp. (X >> N) < C it is equivalent to X >= (C << N) resp. X < (C << N) for arbitrary low bits in X, and for (X >> N) > C resp. (X >> N) <= C it is equivalent to X > ((C + 1) << N) - 1 resp. X <= ((C + 1) << N) - 1. The difference this patch makes is on i?86 for f1: - movl 4(%esp), %eax - sarl $23, %eax - cmpl $12, %eax + xorl %eax, %eax + cmpl $109051903, 4(%esp) setg %al - movzbl %al, %eax ret and on x86_64: - sarl $23, %edi xorl %eax, %eax - cmpl $12, %edi + cmpl $109051903, %edi setg %al ret (and for f3 similarly). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2010-09-09 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/20517 * combine.c (simplify_comparison): Optimize (X >> N) {>,>=,<,<+} C even if low N bits of X aren't known to be zero. * gcc.target/i386/pr20517.c: New test. Jakub