Message ID | 4CB7289A.7080608@ispras.ru |
---|---|
State | New |
Headers | show |
The patch looks good. I'd like to have a generic test for this. You should be able to see the shift in the .optimized tree dump. r~
On Thu, Oct 14, 2010 at 07:58:18PM +0400, Dmitry Melnik wrote: > diff --git a/gcc/fold-const.c b/gcc/fold-const.c > index 8146920..de0f350 100644 > --- a/gcc/fold-const.c > +++ b/gcc/fold-const.c > @@ -11599,6 +11599,22 @@ fold_binary_loc (location_t loc, > return NULL_TREE; > > case TRUNC_DIV_EXPR: > + /* Optimize (X & (-A)) / A where A is a power of 2, > + to X >> log2(A) */ > + if (TREE_CODE (arg0) == BIT_AND_EXPR > + && !TYPE_UNSIGNED (type) > + && integer_pow2p (arg1) && tree_int_cst_sgn (arg1) > 0) > + { > + tree sum = fold_binary_loc (loc, PLUS_EXPR, TREE_TYPE (arg1), > + arg1, TREE_OPERAND (arg0, 1)); > + if (sum && integer_zerop (sum)) > + return fold_build2_loc (loc, RSHIFT_EXPR, type, > + TREE_OPERAND (arg0, 0), > + build_int_cst (NULL_TREE, > + exact_log2 (TREE_INT_CST_LOW (arg1)))); Consider long long x = foo (); x = (x & -0x200000000LL) / 0x200000000LL; The above code would optimize this as x = x >> -1; on 32-bit HWI targets. Also, I think checking that arg1 is actually INTEGER_CST would be desirable too (integer_pow2p can return true even for COMPLEX_CST and then tree_int_cst_sgn accesses something the COMPLEX_CST doesn't have). Seems you've probably copied it from elsewhere which has a similar bug, will file a PR and fix that: extern void abort (void); __attribute__((noinline)) unsigned long long foo (unsigned long long x, int n) { return x / (0x200000000ULL << n); } volatile unsigned long long l = 0x40000000000ULL; int main (void) { if (foo (l, 1) != 0x100 || foo (l, 3) != 0x40) abort (); return 0; } fails on i686-linux at -O2 if 32-bit HWI, succeeds with 64-bit HWI or -m64. Jakub
2010-10-13 Dmitry Melnik <dm@ispras.ru> gcc/ * fold-const.c (fold_binary_loc): New transformation. gcc/testsuite/ * gcc.target/arm/20101013-1.c: New test. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 8146920..de0f350 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -11599,6 +11599,22 @@ fold_binary_loc (location_t loc, return NULL_TREE; case TRUNC_DIV_EXPR: + /* Optimize (X & (-A)) / A where A is a power of 2, + to X >> log2(A) */ + if (TREE_CODE (arg0) == BIT_AND_EXPR + && !TYPE_UNSIGNED (type) + && integer_pow2p (arg1) && tree_int_cst_sgn (arg1) > 0) + { + tree sum = fold_binary_loc (loc, PLUS_EXPR, TREE_TYPE (arg1), + arg1, TREE_OPERAND (arg0, 1)); + if (sum && integer_zerop (sum)) + return fold_build2_loc (loc, RSHIFT_EXPR, type, + TREE_OPERAND (arg0, 0), + build_int_cst (NULL_TREE, + exact_log2 (TREE_INT_CST_LOW (arg1)))); + } + + /* Fall thru */ case FLOOR_DIV_EXPR: /* Simplify A / (B << N) where A and B are positive and B is a power of 2, to A >> (N + log2(B)). */ diff --git a/gcc/testsuite/gcc.target/arm/20101013-1.c b/gcc/testsuite/gcc.target/arm/20101013-1.c new file mode 100644 index 0000000..34b19d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/20101013-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int foo(int a) +{ + int x = (a & (~15)) / 16; + return x; +} + +/* { dg-final { scan-assembler-not "cmp" } } */