Message ID | 20130411181519.GU16463@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
On Thu, 11 Apr 2013, Jakub Jelinek wrote: > The op1 - op2 line instead of multiplication is something that has been > introduced in C++ double_int changes, but without the 2 * TYPE_PRECISION > prec it would always return 0 or -1 anyway. The comments on PREC in the rshift function really confused me. > And, with wide_mul_with_sign > we can also handle 2 * HOST_BITS_PER_WIDE_INT precisions. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8? I have one question below. > 2013-04-11 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/56918 > PR tree-optimization/56920 > * fold-const.c (int_const_binop_1): Use op1.mul_with_sign (op2, ...) > instead of op1 - op2. Pass 2 * TYPE_PRECISION (type) as second > argument to rshift method. For 2 * HOST_BITS_PER_WIDE_INT precision > use wide_mul_with_sign method. > > * gcc.dg/vect/pr56918.c: New test. > * gcc.dg/vect/pr56920.c: New test. > > --- gcc/fold-const.c.jj 2013-04-11 09:36:41.000000000 +0200 > +++ gcc/fold-const.c 2013-04-11 16:16:50.273545925 +0200 > @@ -984,12 +984,16 @@ int_const_binop_1 (enum tree_code code, > break; > > case MULT_HIGHPART_EXPR: > - /* ??? Need quad precision, or an additional shift operand > - to the multiply primitive, to handle very large highparts. */ > if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT) > - return NULL_TREE; > - tmp = op1 - op2; > - res = tmp.rshift (TYPE_PRECISION (type), TYPE_PRECISION (type), !uns); > + { > + if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT) > + return NULL_TREE; > + op1.wide_mul_with_sign (op2, false, &res, &overflow); Why "false" and not something involving "uns"? > + break; > + } > + tmp = op1.mul_with_sign (op2, false, &overflow); > + res > + = tmp.rshift (TYPE_PRECISION (type), 2 * TYPE_PRECISION (type), !uns); > break; > > case TRUNC_DIV_EXPR: > --- gcc/testsuite/gcc.dg/vect/pr56918.c.jj 2013-04-11 16:26:36.269168125 +0200 > +++ gcc/testsuite/gcc.dg/vect/pr56918.c 2013-04-11 16:30:07.410945804 +0200 > @@ -0,0 +1,31 @@ > +/* PR tree-optimization/56918 */ > +/* { dg-additional-options "-O3" } */ > + > +#include "tree-vect.h" > + > +extern void abort (void); > +double data[8]; > + > +__attribute__((noinline, noclone)) void > +foo () > +{ > + int i; > + for (i = 0; i < 8; ++i) > + data[i] = ((i + 2) % 3) + 1; > +} > + > +int > +main () > +{ > + int i; > + check_vect (); > + foo (); > + if (data[0] != 3 || data[7] != 1) > + abort (); > + for (i = 1; i < 4; ++i) > + if (data[i] != i || data[i + 3] != i) > + abort (); > + return 0; > +} > + > +/* { dg-final { cleanup-tree-dump "vect" } } */ > --- gcc/testsuite/gcc.dg/vect/pr56920.c.jj 2013-04-11 17:16:48.570982675 +0200 > +++ gcc/testsuite/gcc.dg/vect/pr56920.c 2013-04-11 17:17:28.310757853 +0200 > @@ -0,0 +1,21 @@ > +/* PR tree-optimization/56920 */ > +/* { dg-additional-options "-O3" } */ > + > +#include "tree-vect.h" > + > +extern void abort (void); > + > +int > +main () > +{ > + unsigned int a[15], i; > + check_vect (); > + for (i = 0; i < 15; ++i) > + a[i] = (i * 2) % 15; > + for (i = 0; i < 15; ++i) > + if (a[i] != (i * 2) % 15) > + abort (); > + return 0; > +} > + > +/* { dg-final { cleanup-tree-dump "vect" } } */ > > Jakub >
On Thu, Apr 11, 2013 at 09:13:17PM +0200, Marc Glisse wrote: > On Thu, 11 Apr 2013, Jakub Jelinek wrote: > > >The op1 - op2 line instead of multiplication is something that has been > >introduced in C++ double_int changes, but without the 2 * TYPE_PRECISION > >prec it would always return 0 or -1 anyway. > > The comments on PREC in the rshift function really confused me. Yeah, me too. But if count == prec, the code always: /* Zero / sign extend all bits that are beyond the precision. */ if (count >= prec) { *hv = signmask; *lv = signmask; } which is definitely not what we want. We want TYPE_PRECISION bits of the result zero resp. sign extended into the whole double_int precision, which for TYPE_PRECISION == HOST_BITS_PER_WIDE_INT is exactly the: else if ((prec - count) >= HOST_BITS_PER_WIDE_INT) { *hv &= ~((HOST_WIDE_INT) (-1) << (prec - count - HOST_BITS_PER_WIDE_INT)); *hv |= signmask << (prec - count - HOST_BITS_PER_WIDE_INT); } case, and for smaller TYPE_PRECISION it is the last case: else { *hv = signmask; *lv &= ~((unsigned HOST_WIDE_INT) (-1) << (prec - count)); *lv |= signmask << (prec - count); } > I have one question below. > >+ { > >+ if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT) > >+ return NULL_TREE; > >+ op1.wide_mul_with_sign (op2, false, &res, &overflow); > > Why "false" and not something involving "uns"? false was what rth used in his original code. Looking at what is the difference, it seems unsigned_p only matters for the computation of overflow, and, MULT_HIGHPART_EXPR should never overflow, thus perhaps we should either have a dummy bool overflow_dummy; there and use it instead of &overflow, or just set overflow = false; afterwards. Preferences? Jakub
On Fri, 12 Apr 2013, Jakub Jelinek wrote: > On Thu, Apr 11, 2013 at 09:13:17PM +0200, Marc Glisse wrote: > > On Thu, 11 Apr 2013, Jakub Jelinek wrote: > > > > >The op1 - op2 line instead of multiplication is something that has been > > >introduced in C++ double_int changes, but without the 2 * TYPE_PRECISION > > >prec it would always return 0 or -1 anyway. > > > > The comments on PREC in the rshift function really confused me. > > Yeah, me too. But if count == prec, the code always: > /* Zero / sign extend all bits that are beyond the precision. */ > > if (count >= prec) > { > *hv = signmask; > *lv = signmask; > } > which is definitely not what we want. We want TYPE_PRECISION bits > of the result zero resp. sign extended into the whole double_int precision, > which for TYPE_PRECISION == HOST_BITS_PER_WIDE_INT is exactly the: > else if ((prec - count) >= HOST_BITS_PER_WIDE_INT) > { > *hv &= ~((HOST_WIDE_INT) (-1) << (prec - count - HOST_BITS_PER_WIDE_INT)); > *hv |= signmask << (prec - count - HOST_BITS_PER_WIDE_INT); > } > case, and for smaller TYPE_PRECISION it is the last case: > else > { > *hv = signmask; > *lv &= ~((unsigned HOST_WIDE_INT) (-1) << (prec - count)); > *lv |= signmask << (prec - count); > } > > > I have one question below. > > >+ { > > >+ if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT) > > >+ return NULL_TREE; > > >+ op1.wide_mul_with_sign (op2, false, &res, &overflow); > > > > Why "false" and not something involving "uns"? > > false was what rth used in his original code. Looking at what is the > difference, it seems unsigned_p only matters for the computation of > overflow, and, MULT_HIGHPART_EXPR should never overflow, thus perhaps > we should either have a dummy bool overflow_dummy; there and use it > instead of &overflow, or just set overflow = false; afterwards. > Preferences? dummy_overflow. Ok with that change. Thanks, Richard.
On Fri, 12 Apr 2013, Jakub Jelinek wrote: > On Thu, Apr 11, 2013 at 09:13:17PM +0200, Marc Glisse wrote: >> The comments on PREC in the rshift function really confused me. > > Yeah, me too. But if count == prec, the code always: > /* Zero / sign extend all bits that are beyond the precision. */ > > if (count >= prec) > { > *hv = signmask; > *lv = signmask; > } > which is definitely not what we want. I agree. I meant something like: if someone knows how to improve that comment... >> I have one question below. >>> + { >>> + if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT) >>> + return NULL_TREE; >>> + op1.wide_mul_with_sign (op2, false, &res, &overflow); >> >> Why "false" and not something involving "uns"? > > false was what rth used in his original code. Looking at what is the > difference, it seems unsigned_p only matters for the computation of > overflow, and, MULT_HIGHPART_EXPR should never overflow, thus perhaps > we should either have a dummy bool overflow_dummy; there and use it > instead of &overflow, or just set overflow = false; afterwards. unsigned_p changes the computation of the high part of the product: it returns early for unsigned products and adjusts the high part later for signed. overflow_dummy looks good. I don't remember exactly, but I think the overflow parameter is only there because the regular multiplication forwards to the wide multiplication.
On Fri, Apr 12, 2013 at 10:10:45AM +0200, Marc Glisse wrote: > unsigned_p changes the computation of the high part of the product: > it returns early for unsigned products and adjusts the high part > later for signed. overflow_dummy looks good. I don't remember > exactly, but I think the overflow parameter is only there because > the regular multiplication forwards to the wide multiplication. Ok, I've changed the wide_mul_with_sign method call to pass uns instead of false before committing. Jakub
--- gcc/fold-const.c.jj 2013-04-11 09:36:41.000000000 +0200 +++ gcc/fold-const.c 2013-04-11 16:16:50.273545925 +0200 @@ -984,12 +984,16 @@ int_const_binop_1 (enum tree_code code, break; case MULT_HIGHPART_EXPR: - /* ??? Need quad precision, or an additional shift operand - to the multiply primitive, to handle very large highparts. */ if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT) - return NULL_TREE; - tmp = op1 - op2; - res = tmp.rshift (TYPE_PRECISION (type), TYPE_PRECISION (type), !uns); + { + if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT) + return NULL_TREE; + op1.wide_mul_with_sign (op2, false, &res, &overflow); + break; + } + tmp = op1.mul_with_sign (op2, false, &overflow); + res + = tmp.rshift (TYPE_PRECISION (type), 2 * TYPE_PRECISION (type), !uns); break; case TRUNC_DIV_EXPR: --- gcc/testsuite/gcc.dg/vect/pr56918.c.jj 2013-04-11 16:26:36.269168125 +0200 +++ gcc/testsuite/gcc.dg/vect/pr56918.c 2013-04-11 16:30:07.410945804 +0200 @@ -0,0 +1,31 @@ +/* PR tree-optimization/56918 */ +/* { dg-additional-options "-O3" } */ + +#include "tree-vect.h" + +extern void abort (void); +double data[8]; + +__attribute__((noinline, noclone)) void +foo () +{ + int i; + for (i = 0; i < 8; ++i) + data[i] = ((i + 2) % 3) + 1; +} + +int +main () +{ + int i; + check_vect (); + foo (); + if (data[0] != 3 || data[7] != 1) + abort (); + for (i = 1; i < 4; ++i) + if (data[i] != i || data[i + 3] != i) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/pr56920.c.jj 2013-04-11 17:16:48.570982675 +0200 +++ gcc/testsuite/gcc.dg/vect/pr56920.c 2013-04-11 17:17:28.310757853 +0200 @@ -0,0 +1,21 @@ +/* PR tree-optimization/56920 */ +/* { dg-additional-options "-O3" } */ + +#include "tree-vect.h" + +extern void abort (void); + +int +main () +{ + unsigned int a[15], i; + check_vect (); + for (i = 0; i < 15; ++i) + a[i] = (i * 2) % 15; + for (i = 0; i < 15; ++i) + if (a[i] != (i * 2) % 15) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */