Patchwork Fix MULT_HIGHPART_EXPR folding (PR tree-optimization/56918)

login
register
mail settings
Submitter Jakub Jelinek
Date April 11, 2013, 6:15 p.m.
Message ID <20130411181519.GU16463@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/235862/
State New
Headers show

Comments

Jakub Jelinek - April 11, 2013, 6:15 p.m.
Hi!

The op1 - op2 line instead of multiplication is something that has been
introduced in C++ double_int changes, but without the 2 * TYPE_PRECISION
prec it would always return 0 or -1 anyway.  And, with wide_mul_with_sign
we can also handle 2 * HOST_BITS_PER_WIDE_INT precisions.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8?

2013-04-11  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/56918
	PR tree-optimization/56920
	* fold-const.c (int_const_binop_1): Use op1.mul_with_sign (op2, ...)
	instead of op1 - op2.  Pass 2 * TYPE_PRECISION (type) as second
	argument to rshift method.  For 2 * HOST_BITS_PER_WIDE_INT precision
	use wide_mul_with_sign method.

	* gcc.dg/vect/pr56918.c: New test.
	* gcc.dg/vect/pr56920.c: New test.


	Jakub
Marc Glisse - April 11, 2013, 7:13 p.m.
On Thu, 11 Apr 2013, Jakub Jelinek wrote:

> The op1 - op2 line instead of multiplication is something that has been
> introduced in C++ double_int changes, but without the 2 * TYPE_PRECISION
> prec it would always return 0 or -1 anyway.

The comments on PREC in the rshift function really confused me.

> And, with wide_mul_with_sign
> we can also handle 2 * HOST_BITS_PER_WIDE_INT precisions.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8?

I have one question below.

> 2013-04-11  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR tree-optimization/56918
> 	PR tree-optimization/56920
> 	* fold-const.c (int_const_binop_1): Use op1.mul_with_sign (op2, ...)
> 	instead of op1 - op2.  Pass 2 * TYPE_PRECISION (type) as second
> 	argument to rshift method.  For 2 * HOST_BITS_PER_WIDE_INT precision
> 	use wide_mul_with_sign method.
>
> 	* gcc.dg/vect/pr56918.c: New test.
> 	* gcc.dg/vect/pr56920.c: New test.
>
> --- gcc/fold-const.c.jj	2013-04-11 09:36:41.000000000 +0200
> +++ gcc/fold-const.c	2013-04-11 16:16:50.273545925 +0200
> @@ -984,12 +984,16 @@ int_const_binop_1 (enum tree_code code,
>       break;
>
>     case MULT_HIGHPART_EXPR:
> -      /* ??? Need quad precision, or an additional shift operand
> -	 to the multiply primitive, to handle very large highparts.  */
>       if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
> -	return NULL_TREE;
> -      tmp = op1 - op2;
> -      res = tmp.rshift (TYPE_PRECISION (type), TYPE_PRECISION (type), !uns);
> +	{
> +	  if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT)
> +	    return NULL_TREE;
> +	  op1.wide_mul_with_sign (op2, false, &res, &overflow);

Why "false" and not something involving "uns"?

> +	  break;
> +	}
> +      tmp = op1.mul_with_sign (op2, false, &overflow);
> +      res
> +	= tmp.rshift (TYPE_PRECISION (type), 2 * TYPE_PRECISION (type), !uns);
>       break;
>
>     case TRUNC_DIV_EXPR:
> --- gcc/testsuite/gcc.dg/vect/pr56918.c.jj	2013-04-11 16:26:36.269168125 +0200
> +++ gcc/testsuite/gcc.dg/vect/pr56918.c	2013-04-11 16:30:07.410945804 +0200
> @@ -0,0 +1,31 @@
> +/* PR tree-optimization/56918 */
> +/* { dg-additional-options "-O3" } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +double data[8];
> +
> +__attribute__((noinline, noclone)) void
> +foo ()
> +{
> +  int i;
> +  for (i = 0; i < 8; ++i)
> +    data[i] = ((i + 2) % 3) + 1;
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +  check_vect ();
> +  foo ();
> +  if (data[0] != 3 || data[7] != 1)
> +    abort ();
> +  for (i = 1; i < 4; ++i)
> +    if (data[i] != i || data[i + 3] != i)
> +      abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/vect/pr56920.c.jj	2013-04-11 17:16:48.570982675 +0200
> +++ gcc/testsuite/gcc.dg/vect/pr56920.c	2013-04-11 17:17:28.310757853 +0200
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/56920 */
> +/* { dg-additional-options "-O3" } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +
> +int
> +main ()
> +{
> +  unsigned int a[15], i;
> +  check_vect ();
> +  for (i = 0; i < 15; ++i)
> +    a[i] = (i * 2) % 15;
> +  for (i = 0; i < 15; ++i)
> +    if (a[i] != (i * 2) % 15)
> +      abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
>
> 	Jakub
>
Jakub Jelinek - April 12, 2013, 7:44 a.m.
On Thu, Apr 11, 2013 at 09:13:17PM +0200, Marc Glisse wrote:
> On Thu, 11 Apr 2013, Jakub Jelinek wrote:
> 
> >The op1 - op2 line instead of multiplication is something that has been
> >introduced in C++ double_int changes, but without the 2 * TYPE_PRECISION
> >prec it would always return 0 or -1 anyway.
> 
> The comments on PREC in the rshift function really confused me.

Yeah, me too.  But if count == prec, the code always:
  /* Zero / sign extend all bits that are beyond the precision.  */
   
  if (count >= prec)
    {
      *hv = signmask;
      *lv = signmask;
    }          
which is definitely not what we want.  We want TYPE_PRECISION bits
of the result zero resp. sign extended into the whole double_int precision,
which for TYPE_PRECISION == HOST_BITS_PER_WIDE_INT is exactly the:
  else if ((prec - count) >= HOST_BITS_PER_WIDE_INT)
    {
      *hv &= ~((HOST_WIDE_INT) (-1) << (prec - count - HOST_BITS_PER_WIDE_INT));
      *hv |= signmask << (prec - count - HOST_BITS_PER_WIDE_INT);
    }
case, and for smaller TYPE_PRECISION it is the last case:
  else
    {
      *hv = signmask;
      *lv &= ~((unsigned HOST_WIDE_INT) (-1) << (prec - count));
      *lv |= signmask << (prec - count);
    }

> I have one question below.
> >+	{
> >+	  if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT)
> >+	    return NULL_TREE;
> >+	  op1.wide_mul_with_sign (op2, false, &res, &overflow);
> 
> Why "false" and not something involving "uns"?

false was what rth used in his original code.  Looking at what is the
difference, it seems unsigned_p only matters for the computation of
overflow, and, MULT_HIGHPART_EXPR should never overflow, thus perhaps
we should either have a dummy bool overflow_dummy; there and use it
instead of &overflow, or just set overflow = false; afterwards.
Preferences?

	Jakub
Richard Guenther - April 12, 2013, 7:47 a.m.
On Fri, 12 Apr 2013, Jakub Jelinek wrote:

> On Thu, Apr 11, 2013 at 09:13:17PM +0200, Marc Glisse wrote:
> > On Thu, 11 Apr 2013, Jakub Jelinek wrote:
> > 
> > >The op1 - op2 line instead of multiplication is something that has been
> > >introduced in C++ double_int changes, but without the 2 * TYPE_PRECISION
> > >prec it would always return 0 or -1 anyway.
> > 
> > The comments on PREC in the rshift function really confused me.
> 
> Yeah, me too.  But if count == prec, the code always:
>   /* Zero / sign extend all bits that are beyond the precision.  */
>    
>   if (count >= prec)
>     {
>       *hv = signmask;
>       *lv = signmask;
>     }          
> which is definitely not what we want.  We want TYPE_PRECISION bits
> of the result zero resp. sign extended into the whole double_int precision,
> which for TYPE_PRECISION == HOST_BITS_PER_WIDE_INT is exactly the:
>   else if ((prec - count) >= HOST_BITS_PER_WIDE_INT)
>     {
>       *hv &= ~((HOST_WIDE_INT) (-1) << (prec - count - HOST_BITS_PER_WIDE_INT));
>       *hv |= signmask << (prec - count - HOST_BITS_PER_WIDE_INT);
>     }
> case, and for smaller TYPE_PRECISION it is the last case:
>   else
>     {
>       *hv = signmask;
>       *lv &= ~((unsigned HOST_WIDE_INT) (-1) << (prec - count));
>       *lv |= signmask << (prec - count);
>     }
> 
> > I have one question below.
> > >+	{
> > >+	  if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT)
> > >+	    return NULL_TREE;
> > >+	  op1.wide_mul_with_sign (op2, false, &res, &overflow);
> > 
> > Why "false" and not something involving "uns"?
> 
> false was what rth used in his original code.  Looking at what is the
> difference, it seems unsigned_p only matters for the computation of
> overflow, and, MULT_HIGHPART_EXPR should never overflow, thus perhaps
> we should either have a dummy bool overflow_dummy; there and use it
> instead of &overflow, or just set overflow = false; afterwards.
> Preferences?

dummy_overflow.

Ok with that change.

Thanks,
Richard.
Marc Glisse - April 12, 2013, 8:10 a.m.
On Fri, 12 Apr 2013, Jakub Jelinek wrote:

> On Thu, Apr 11, 2013 at 09:13:17PM +0200, Marc Glisse wrote:
>> The comments on PREC in the rshift function really confused me.
>
> Yeah, me too.  But if count == prec, the code always:
>  /* Zero / sign extend all bits that are beyond the precision.  */
>
>  if (count >= prec)
>    {
>      *hv = signmask;
>      *lv = signmask;
>    }
> which is definitely not what we want.

I agree. I meant something like: if someone knows how to improve that 
comment...

>> I have one question below.
>>> +	{
>>> +	  if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT)
>>> +	    return NULL_TREE;
>>> +	  op1.wide_mul_with_sign (op2, false, &res, &overflow);
>>
>> Why "false" and not something involving "uns"?
>
> false was what rth used in his original code.  Looking at what is the
> difference, it seems unsigned_p only matters for the computation of
> overflow, and, MULT_HIGHPART_EXPR should never overflow, thus perhaps
> we should either have a dummy bool overflow_dummy; there and use it
> instead of &overflow, or just set overflow = false; afterwards.

unsigned_p changes the computation of the high part of the product: it 
returns early for unsigned products and adjusts the high part later for 
signed. overflow_dummy looks good. I don't remember exactly, but I think 
the overflow parameter is only there because the regular multiplication 
forwards to the wide multiplication.
Jakub Jelinek - April 12, 2013, 8:17 a.m.
On Fri, Apr 12, 2013 at 10:10:45AM +0200, Marc Glisse wrote:
> unsigned_p changes the computation of the high part of the product:
> it returns early for unsigned products and adjusts the high part
> later for signed. overflow_dummy looks good. I don't remember
> exactly, but I think the overflow parameter is only there because
> the regular multiplication forwards to the wide multiplication.

Ok, I've changed the wide_mul_with_sign method call to pass uns instead of
false before committing.

	Jakub

Patch

--- gcc/fold-const.c.jj	2013-04-11 09:36:41.000000000 +0200
+++ gcc/fold-const.c	2013-04-11 16:16:50.273545925 +0200
@@ -984,12 +984,16 @@  int_const_binop_1 (enum tree_code code,
       break;
 
     case MULT_HIGHPART_EXPR:
-      /* ??? Need quad precision, or an additional shift operand
-	 to the multiply primitive, to handle very large highparts.  */
       if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
-	return NULL_TREE;
-      tmp = op1 - op2;
-      res = tmp.rshift (TYPE_PRECISION (type), TYPE_PRECISION (type), !uns);
+	{
+	  if (TYPE_PRECISION (type) != 2 * HOST_BITS_PER_WIDE_INT)
+	    return NULL_TREE;
+	  op1.wide_mul_with_sign (op2, false, &res, &overflow);
+	  break;
+	}
+      tmp = op1.mul_with_sign (op2, false, &overflow);
+      res
+	= tmp.rshift (TYPE_PRECISION (type), 2 * TYPE_PRECISION (type), !uns);
       break;
 
     case TRUNC_DIV_EXPR:
--- gcc/testsuite/gcc.dg/vect/pr56918.c.jj	2013-04-11 16:26:36.269168125 +0200
+++ gcc/testsuite/gcc.dg/vect/pr56918.c	2013-04-11 16:30:07.410945804 +0200
@@ -0,0 +1,31 @@ 
+/* PR tree-optimization/56918 */
+/* { dg-additional-options "-O3" } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+double data[8];
+
+__attribute__((noinline, noclone)) void
+foo ()
+{
+  int i;
+  for (i = 0; i < 8; ++i)
+    data[i] = ((i + 2) % 3) + 1;
+}
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  foo ();
+  if (data[0] != 3 || data[7] != 1)
+    abort ();
+  for (i = 1; i < 4; ++i)
+    if (data[i] != i || data[i + 3] != i)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/pr56920.c.jj	2013-04-11 17:16:48.570982675 +0200
+++ gcc/testsuite/gcc.dg/vect/pr56920.c	2013-04-11 17:17:28.310757853 +0200
@@ -0,0 +1,21 @@ 
+/* PR tree-optimization/56920 */
+/* { dg-additional-options "-O3" } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+int
+main ()
+{
+  unsigned int a[15], i;
+  check_vect ();
+  for (i = 0; i < 15; ++i)
+    a[i] = (i * 2) % 15;
+  for (i = 0; i < 15; ++i)
+    if (a[i] != (i * 2) % 15)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */