diff mbox

Fix PR44214

Message ID 1334887118.32653.6.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt April 20, 2012, 1:58 a.m. UTC
This enhances constant folding for division by complex and vector
constants.  When -freciprocal-math is present, such divisions are
converted into multiplies by the constant reciprocal.  When an exact
reciprocal is available, this is done for vector constants when
optimizing.  I did not implement logic for exact reciprocals of complex
constants because either (a) the complexity doesn't justify the
likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)

Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
for trunk?

Thanks,
Bill


gcc:

2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR rtl-optimization/44214
	* fold-const.c (exact_inverse): New function.
	(fold_binary_loc): Fold vector and complex division by constant into
	multiply by recripocal with flag_reciprocal_math; fold vector division
	by constant into multiply by reciprocal with exact inverse.

gcc/testsuite:

2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR rtl-optimization/44214
	* gcc.target/powerpc/pr44214-1.c: New test.
	* gcc.dg/pr44214-2.c: Likewise.
	* gcc.target/powerpc/pr44214-3.c: Likewise.

Comments

Richard Biener April 20, 2012, 8:04 a.m. UTC | #1
On Thu, 19 Apr 2012, William J. Schmidt wrote:

> This enhances constant folding for division by complex and vector
> constants.  When -freciprocal-math is present, such divisions are
> converted into multiplies by the constant reciprocal.  When an exact
> reciprocal is available, this is done for vector constants when
> optimizing.  I did not implement logic for exact reciprocals of complex
> constants because either (a) the complexity doesn't justify the
> likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)
> 
> Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
> for trunk?

See below ...

> Thanks,
> Bill
> 
> 
> gcc:
> 
> 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR rtl-optimization/44214
> 	* fold-const.c (exact_inverse): New function.
> 	(fold_binary_loc): Fold vector and complex division by constant into
> 	multiply by recripocal with flag_reciprocal_math; fold vector division
> 	by constant into multiply by reciprocal with exact inverse.
> 
> gcc/testsuite:
> 
> 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR rtl-optimization/44214
> 	* gcc.target/powerpc/pr44214-1.c: New test.
> 	* gcc.dg/pr44214-2.c: Likewise.
> 	* gcc.target/powerpc/pr44214-3.c: Likewise.
> 
> 
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c	(revision 186573)
> +++ gcc/fold-const.c	(working copy)
> @@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc,
>    return NULL_TREE;
>  }
>  
> +/* If the real or vector real constant CST of type TYPE has an exact
> +   inverse, return it, else return NULL.  */
> +
> +static tree
> +exact_inverse (tree type, tree cst)
> +{
> +  REAL_VALUE_TYPE r;
> +  tree unit_type, *elts;
> +  enum machine_mode mode;
> +  unsigned vec_nelts, i;
> +
> +  switch (TREE_CODE (cst))
> +    {
> +    case REAL_CST:
> +      r = TREE_REAL_CST (cst);
> +
> +      if (exact_real_inverse (TYPE_MODE (type), &r))
> +	return build_real (type, r);
> +
> +      return NULL_TREE;
> +
> +    case VECTOR_CST:
> +      vec_nelts = VECTOR_CST_NELTS (cst);
> +      elts = XALLOCAVEC (tree, vec_nelts);
> +      unit_type = TREE_TYPE (type);
> +      mode = TYPE_MODE (unit_type);
> +
> +      for (i = 0; i < vec_nelts; i++)
> +	{
> +	  r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
> +	  if (!exact_real_inverse (mode, &r))
> +	    return NULL_TREE;
> +	  elts[i] = build_real (unit_type, r);
> +	}
> +
> +      return build_vector (type, elts);
> +
> +    default:
> +      return NULL_TREE;
> +    }
> +}
> +
>  /* Fold a binary expression of code CODE and type TYPE with operands
>     OP0 and OP1.  LOC is the location of the resulting expression.
>     Return the folded expression if folding is successful.  Otherwise,
> @@ -11734,23 +11776,25 @@ fold_binary_loc (location_t loc,
>  	 so only do this if -freciprocal-math.  We can actually
>  	 always safely do it if ARG1 is a power of two, but it's hard to
>  	 tell if it is or not in a portable manner.  */
> -      if (TREE_CODE (arg1) == REAL_CST)
> +      if (TREE_CODE (arg1) == REAL_CST
> +	  || (TREE_CODE (arg1) == COMPLEX_CST
> +	      && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
> +	  || (TREE_CODE (arg1) == VECTOR_CST
> +	      && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1))))
>  	{
>  	  if (flag_reciprocal_math
> -	      && 0 != (tem = const_binop (code, build_real (type, dconst1),
> +	      && 0 != (tem = fold_binary (code, type, build_one_cst (type),
>  					  arg1)))

Any reason for not using const_binop?

>  	    return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
> -	  /* Find the reciprocal if optimizing and the result is exact.  */
> -	  if (optimize)
> +	  /* Find the reciprocal if optimizing and the result is exact.
> +	     TODO: Complex reciprocal not implemented.  */
> +	  if (optimize
> +	      && TREE_CODE (arg1) != COMPLEX_CST)

I know this is all pre-existing, but really the flag_reciprocal_math
case should be under if (optimize), too.  So, can you move this check
to the toplevel covering both cases?

The testcases should apply to generic vectors, too, and should scan
the .original dump (where folding first applied).  So they should
not be target specific (and they should use -freciprocal-math).

Ok with those changes.

Thanks,
Richard.

>  	    {
> -	      REAL_VALUE_TYPE r;
> -	      r = TREE_REAL_CST (arg1);
> -	      if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r))
> -		{
> -		  tem = build_real (type, r);
> -		  return fold_build2_loc (loc, MULT_EXPR, type,
> -				      fold_convert_loc (loc, type, arg0), tem);
> -		}
> +	      tree inverse = exact_inverse (TREE_TYPE (arg0), arg1);
> +
> +	      if (inverse)
> +		return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse);
>  	    }
>  	}
>        /* Convert A/B/C to A/(B*C).  */
> Index: gcc/testsuite/gcc.target/powerpc/pr44214-3.c
> ===================================================================
> --- gcc/testsuite/gcc.target/powerpc/pr44214-3.c	(revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/pr44214-3.c	(revision 0)
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=power7 -fdump-tree-optimized" } */
> +
> +void do_div (vector double *a, vector double *b)
> +{
> +  *a = *b / (vector double) { 2.0, 2.0 };
> +}
> +
> +/* Since 2.0 has an exact reciprocal, constant folding should multiply *b
> +   by the reciprocals of the vector elements.  As a result there should be
> +   one vector multiply and zero divides in the optimized code.  The string
> +   " * " occurs 3 times: one multiply and two indirect parameters.  */
> +
> +/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/testsuite/gcc.target/powerpc/pr44214-1.c
> ===================================================================
> --- gcc/testsuite/gcc.target/powerpc/pr44214-1.c	(revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/pr44214-1.c	(revision 0)
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ffast-math -mcpu=power7 -fdump-tree-optimized" } */
> +
> +void do_div (vector double *a, vector double *b)
> +{
> +  *a = *b / (vector double) { 2.0, 3.0 };
> +}
> +
> +/* Constant folding should multiply *b by the reciprocals of the
> +   vector elements.  As a result there should be one vector multiply
> +   and zero divides in the optimized code.  The string " * " occurs
> +   3 times: one multiply and two indirect parameters.  */
> +
> +/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/testsuite/gcc.dg/pr44214-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/pr44214-2.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/pr44214-2.c	(revision 0)
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
> +
> +void do_div (_Complex double *a, _Complex double *b)
> +{
> +  *a = *b / (4.0 - 5.0fi);
> +}
> +
> +/* Constant folding should multiply *b by the reciprocal of 4-5i
> +   = 4/41 - (5/41)i.  As a result there should be 4 multiplies and
> +   zero divides in the optimized code.  The string " * " occurs 6
> +   times: 4 multiplies and 2 indirect parameters.  */
> +
> +/* { dg-final { scan-tree-dump-times " \\\* " 6 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
Bill Schmidt April 20, 2012, 10:10 a.m. UTC | #2
On Fri, 2012-04-20 at 10:04 +0200, Richard Guenther wrote:
> On Thu, 19 Apr 2012, William J. Schmidt wrote:
> 
> > This enhances constant folding for division by complex and vector
> > constants.  When -freciprocal-math is present, such divisions are
> > converted into multiplies by the constant reciprocal.  When an exact
> > reciprocal is available, this is done for vector constants when
> > optimizing.  I did not implement logic for exact reciprocals of complex
> > constants because either (a) the complexity doesn't justify the
> > likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)
> > 
> > Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
> > for trunk?
> 
> See below ...
> 
> > Thanks,
> > Bill
> > 
> > 
> > gcc:
> > 
> > 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > 
> > 	PR rtl-optimization/44214
> > 	* fold-const.c (exact_inverse): New function.
> > 	(fold_binary_loc): Fold vector and complex division by constant into
> > 	multiply by recripocal with flag_reciprocal_math; fold vector division
> > 	by constant into multiply by reciprocal with exact inverse.
> > 
> > gcc/testsuite:
> > 
> > 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > 
> > 	PR rtl-optimization/44214
> > 	* gcc.target/powerpc/pr44214-1.c: New test.
> > 	* gcc.dg/pr44214-2.c: Likewise.
> > 	* gcc.target/powerpc/pr44214-3.c: Likewise.
> > 
> > 
> > Index: gcc/fold-const.c
> > ===================================================================
> > --- gcc/fold-const.c	(revision 186573)
> > +++ gcc/fold-const.c	(working copy)
> > @@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc,
> >    return NULL_TREE;
> >  }
> >  
> > +/* If the real or vector real constant CST of type TYPE has an exact
> > +   inverse, return it, else return NULL.  */
> > +
> > +static tree
> > +exact_inverse (tree type, tree cst)
> > +{
> > +  REAL_VALUE_TYPE r;
> > +  tree unit_type, *elts;
> > +  enum machine_mode mode;
> > +  unsigned vec_nelts, i;
> > +
> > +  switch (TREE_CODE (cst))
> > +    {
> > +    case REAL_CST:
> > +      r = TREE_REAL_CST (cst);
> > +
> > +      if (exact_real_inverse (TYPE_MODE (type), &r))
> > +	return build_real (type, r);
> > +
> > +      return NULL_TREE;
> > +
> > +    case VECTOR_CST:
> > +      vec_nelts = VECTOR_CST_NELTS (cst);
> > +      elts = XALLOCAVEC (tree, vec_nelts);
> > +      unit_type = TREE_TYPE (type);
> > +      mode = TYPE_MODE (unit_type);
> > +
> > +      for (i = 0; i < vec_nelts; i++)
> > +	{
> > +	  r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
> > +	  if (!exact_real_inverse (mode, &r))
> > +	    return NULL_TREE;
> > +	  elts[i] = build_real (unit_type, r);
> > +	}
> > +
> > +      return build_vector (type, elts);
> > +
> > +    default:
> > +      return NULL_TREE;
> > +    }
> > +}
> > +
> >  /* Fold a binary expression of code CODE and type TYPE with operands
> >     OP0 and OP1.  LOC is the location of the resulting expression.
> >     Return the folded expression if folding is successful.  Otherwise,
> > @@ -11734,23 +11776,25 @@ fold_binary_loc (location_t loc,
> >  	 so only do this if -freciprocal-math.  We can actually
> >  	 always safely do it if ARG1 is a power of two, but it's hard to
> >  	 tell if it is or not in a portable manner.  */
> > -      if (TREE_CODE (arg1) == REAL_CST)
> > +      if (TREE_CODE (arg1) == REAL_CST
> > +	  || (TREE_CODE (arg1) == COMPLEX_CST
> > +	      && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
> > +	  || (TREE_CODE (arg1) == VECTOR_CST
> > +	      && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1))))
> >  	{
> >  	  if (flag_reciprocal_math
> > -	      && 0 != (tem = const_binop (code, build_real (type, dconst1),
> > +	      && 0 != (tem = fold_binary (code, type, build_one_cst (type),
> >  					  arg1)))
> 
> Any reason for not using const_binop?

As it turns out, no.  I (blindly) made this change based on your comment
in the PR...

"The fold code should probably simply use fold_binary to do the constant
folding (which already should handle 1/x for x vector and complex. There
is a build_one_cst to build the constant 1 for any type)."

...but now that I've looked at it that was unnecessary, so I must have
misinterpreted this.  I'll revert to using const_binop.

> 
> >  	    return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
> > -	  /* Find the reciprocal if optimizing and the result is exact.  */
> > -	  if (optimize)
> > +	  /* Find the reciprocal if optimizing and the result is exact.
> > +	     TODO: Complex reciprocal not implemented.  */
> > +	  if (optimize
> > +	      && TREE_CODE (arg1) != COMPLEX_CST)
> 
> I know this is all pre-existing, but really the flag_reciprocal_math
> case should be under if (optimize), too.  So, can you move this check
> to the toplevel covering both cases?

Sure.

> 
> The testcases should apply to generic vectors, too, and should scan
> the .original dump (where folding first applied).  So they should
> not be target specific (and they should use -freciprocal-math).

OK.  I was ignorant of the generic vector syntax using __attribute__.
If I change pr44214-1.c to use:

--
typedef double v2sd __attribute__ ((vector_size (16)));

void do_div (v2sd *a, v2sd *b)
{
  v2sd c = { 2.0, 3.0 };
  *a = *b / c;
}
--

it compiles fine, but the .original dump doesn't show the fold:

--
;; Function do_div (null)
;; enabled by -tree-original


{
  v2sd c = { 2.0e+0, 3.0e+0 };

    v2sd c = { 2.0e+0, 3.0e+0 };
  *a = *b / c;
}
Richard Biener April 20, 2012, 10:19 a.m. UTC | #3
On Fri, 20 Apr 2012, William J. Schmidt wrote:

> On Fri, 2012-04-20 at 10:04 +0200, Richard Guenther wrote:
> > On Thu, 19 Apr 2012, William J. Schmidt wrote:
> > 
> > > This enhances constant folding for division by complex and vector
> > > constants.  When -freciprocal-math is present, such divisions are
> > > converted into multiplies by the constant reciprocal.  When an exact
> > > reciprocal is available, this is done for vector constants when
> > > optimizing.  I did not implement logic for exact reciprocals of complex
> > > constants because either (a) the complexity doesn't justify the
> > > likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)
> > > 
> > > Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
> > > for trunk?
> > 
> > See below ...
> > 
> > > Thanks,
> > > Bill
> > > 
> > > 
> > > gcc:
> > > 
> > > 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > 
> > > 	PR rtl-optimization/44214
> > > 	* fold-const.c (exact_inverse): New function.
> > > 	(fold_binary_loc): Fold vector and complex division by constant into
> > > 	multiply by recripocal with flag_reciprocal_math; fold vector division
> > > 	by constant into multiply by reciprocal with exact inverse.
> > > 
> > > gcc/testsuite:
> > > 
> > > 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > 
> > > 	PR rtl-optimization/44214
> > > 	* gcc.target/powerpc/pr44214-1.c: New test.
> > > 	* gcc.dg/pr44214-2.c: Likewise.
> > > 	* gcc.target/powerpc/pr44214-3.c: Likewise.
> > > 
> > > 
> > > Index: gcc/fold-const.c
> > > ===================================================================
> > > --- gcc/fold-const.c	(revision 186573)
> > > +++ gcc/fold-const.c	(working copy)
> > > @@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc,
> > >    return NULL_TREE;
> > >  }
> > >  
> > > +/* If the real or vector real constant CST of type TYPE has an exact
> > > +   inverse, return it, else return NULL.  */
> > > +
> > > +static tree
> > > +exact_inverse (tree type, tree cst)
> > > +{
> > > +  REAL_VALUE_TYPE r;
> > > +  tree unit_type, *elts;
> > > +  enum machine_mode mode;
> > > +  unsigned vec_nelts, i;
> > > +
> > > +  switch (TREE_CODE (cst))
> > > +    {
> > > +    case REAL_CST:
> > > +      r = TREE_REAL_CST (cst);
> > > +
> > > +      if (exact_real_inverse (TYPE_MODE (type), &r))
> > > +	return build_real (type, r);
> > > +
> > > +      return NULL_TREE;
> > > +
> > > +    case VECTOR_CST:
> > > +      vec_nelts = VECTOR_CST_NELTS (cst);
> > > +      elts = XALLOCAVEC (tree, vec_nelts);
> > > +      unit_type = TREE_TYPE (type);
> > > +      mode = TYPE_MODE (unit_type);
> > > +
> > > +      for (i = 0; i < vec_nelts; i++)
> > > +	{
> > > +	  r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
> > > +	  if (!exact_real_inverse (mode, &r))
> > > +	    return NULL_TREE;
> > > +	  elts[i] = build_real (unit_type, r);
> > > +	}
> > > +
> > > +      return build_vector (type, elts);
> > > +
> > > +    default:
> > > +      return NULL_TREE;
> > > +    }
> > > +}
> > > +
> > >  /* Fold a binary expression of code CODE and type TYPE with operands
> > >     OP0 and OP1.  LOC is the location of the resulting expression.
> > >     Return the folded expression if folding is successful.  Otherwise,
> > > @@ -11734,23 +11776,25 @@ fold_binary_loc (location_t loc,
> > >  	 so only do this if -freciprocal-math.  We can actually
> > >  	 always safely do it if ARG1 is a power of two, but it's hard to
> > >  	 tell if it is or not in a portable manner.  */
> > > -      if (TREE_CODE (arg1) == REAL_CST)
> > > +      if (TREE_CODE (arg1) == REAL_CST
> > > +	  || (TREE_CODE (arg1) == COMPLEX_CST
> > > +	      && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
> > > +	  || (TREE_CODE (arg1) == VECTOR_CST
> > > +	      && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1))))
> > >  	{
> > >  	  if (flag_reciprocal_math
> > > -	      && 0 != (tem = const_binop (code, build_real (type, dconst1),
> > > +	      && 0 != (tem = fold_binary (code, type, build_one_cst (type),
> > >  					  arg1)))
> > 
> > Any reason for not using const_binop?
> 
> As it turns out, no.  I (blindly) made this change based on your comment
> in the PR...
> 
> "The fold code should probably simply use fold_binary to do the constant
> folding (which already should handle 1/x for x vector and complex. There
> is a build_one_cst to build the constant 1 for any type)."
> 
> ...but now that I've looked at it that was unnecessary, so I must have
> misinterpreted this.  I'll revert to using const_binop.
> 
> > 
> > >  	    return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
> > > -	  /* Find the reciprocal if optimizing and the result is exact.  */
> > > -	  if (optimize)
> > > +	  /* Find the reciprocal if optimizing and the result is exact.
> > > +	     TODO: Complex reciprocal not implemented.  */
> > > +	  if (optimize
> > > +	      && TREE_CODE (arg1) != COMPLEX_CST)
> > 
> > I know this is all pre-existing, but really the flag_reciprocal_math
> > case should be under if (optimize), too.  So, can you move this check
> > to the toplevel covering both cases?
> 
> Sure.
> 
> > 
> > The testcases should apply to generic vectors, too, and should scan
> > the .original dump (where folding first applied).  So they should
> > not be target specific (and they should use -freciprocal-math).
> 
> OK.  I was ignorant of the generic vector syntax using __attribute__.
> If I change pr44214-1.c to use:
> 
> --
> typedef double v2sd __attribute__ ((vector_size (16)));
> 
> void do_div (v2sd *a, v2sd *b)
> {
>   v2sd c = { 2.0, 3.0 };
>   *a = *b / c;
> }
> --
> 
> it compiles fine, but the .original dump doesn't show the fold:
> 
> --
> ;; Function do_div (null)
> ;; enabled by -tree-original
> 
> 
> {
>   v2sd c = { 2.0e+0, 3.0e+0 };
> 
>     v2sd c = { 2.0e+0, 3.0e+0 };
>   *a = *b / c;
> }

That's because you have a separate stmt for initializing the constant.

 *a = *b / (v2sd) { 2.0, 3.0 };

should work
H.J. Lu April 20, 2012, 6:32 p.m. UTC | #4
On Thu, Apr 19, 2012 at 6:58 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This enhances constant folding for division by complex and vector
> constants.  When -freciprocal-math is present, such divisions are
> converted into multiplies by the constant reciprocal.  When an exact
> reciprocal is available, this is done for vector constants when
> optimizing.  I did not implement logic for exact reciprocals of complex
> constants because either (a) the complexity doesn't justify the
> likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)
>
> Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
> for trunk?
>
> Thanks,
> Bill
>
>
> gcc:
>
> 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR rtl-optimization/44214
>        * fold-const.c (exact_inverse): New function.
>        (fold_binary_loc): Fold vector and complex division by constant into
>        multiply by recripocal with flag_reciprocal_math; fold vector division
>        by constant into multiply by reciprocal with exact inverse.
>
> gcc/testsuite:
>

It caused:

FAIL: gcc.dg/torture/builtin-explog-1.c  -O0  (test for excess errors)
FAIL: gcc.dg/torture/builtin-power-1.c  -O0  (test for excess errors)

on x86.
Bill Schmidt April 20, 2012, 8:07 p.m. UTC | #5
On Fri, 2012-04-20 at 11:32 -0700, H.J. Lu wrote:
> On Thu, Apr 19, 2012 at 6:58 PM, William J. Schmidt
> <wschmidt@linux.vnet.ibm.com> wrote:
> > This enhances constant folding for division by complex and vector
> > constants.  When -freciprocal-math is present, such divisions are
> > converted into multiplies by the constant reciprocal.  When an exact
> > reciprocal is available, this is done for vector constants when
> > optimizing.  I did not implement logic for exact reciprocals of complex
> > constants because either (a) the complexity doesn't justify the
> > likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)
> >
> > Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
> > for trunk?
> >
> > Thanks,
> > Bill
> >
> >
> > gcc:
> >
> > 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> >
> >        PR rtl-optimization/44214
> >        * fold-const.c (exact_inverse): New function.
> >        (fold_binary_loc): Fold vector and complex division by constant into
> >        multiply by recripocal with flag_reciprocal_math; fold vector division
> >        by constant into multiply by reciprocal with exact inverse.
> >
> > gcc/testsuite:
> >
> 
> It caused:
> 
> FAIL: gcc.dg/torture/builtin-explog-1.c  -O0  (test for excess errors)
> FAIL: gcc.dg/torture/builtin-power-1.c  -O0  (test for excess errors)
> 
> on x86.
> 

Hm, sorry, I don't know how that escaped my testing.  This was due to
the suggestion to have the "optimize" test encompass the
-freciprocal-math test.  Looks like this changes some expected behavior,
at least for these two tests.  

Two options: Revert the move of the "optimize" test, or change the tests
to require -O1 or above.  Richard, what's your preference?

Thanks,
Bill
Richard Biener April 23, 2012, 9:09 a.m. UTC | #6
On Fri, 20 Apr 2012, William J. Schmidt wrote:

> On Fri, 2012-04-20 at 11:32 -0700, H.J. Lu wrote:
> > On Thu, Apr 19, 2012 at 6:58 PM, William J. Schmidt
> > <wschmidt@linux.vnet.ibm.com> wrote:
> > > This enhances constant folding for division by complex and vector
> > > constants.  When -freciprocal-math is present, such divisions are
> > > converted into multiplies by the constant reciprocal.  When an exact
> > > reciprocal is available, this is done for vector constants when
> > > optimizing.  I did not implement logic for exact reciprocals of complex
> > > constants because either (a) the complexity doesn't justify the
> > > likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)
> > >
> > > Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
> > > for trunk?
> > >
> > > Thanks,
> > > Bill
> > >
> > >
> > > gcc:
> > >
> > > 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > >
> > >        PR rtl-optimization/44214
> > >        * fold-const.c (exact_inverse): New function.
> > >        (fold_binary_loc): Fold vector and complex division by constant into
> > >        multiply by recripocal with flag_reciprocal_math; fold vector division
> > >        by constant into multiply by reciprocal with exact inverse.
> > >
> > > gcc/testsuite:
> > >
> > 
> > It caused:
> > 
> > FAIL: gcc.dg/torture/builtin-explog-1.c  -O0  (test for excess errors)
> > FAIL: gcc.dg/torture/builtin-power-1.c  -O0  (test for excess errors)
> > 
> > on x86.
> > 
> 
> Hm, sorry, I don't know how that escaped my testing.  This was due to
> the suggestion to have the "optimize" test encompass the
> -freciprocal-math test.  Looks like this changes some expected behavior,
> at least for these two tests.  
> 
> Two options: Revert the move of the "optimize" test, or change the tests
> to require -O1 or above.  Richard, what's your preference?

Change the test to require -O1 or above.

Richard.
diff mbox

Patch

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 186573)
+++ gcc/fold-const.c	(working copy)
@@ -9693,6 +9693,48 @@  fold_addr_of_array_ref_difference (location_t loc,
   return NULL_TREE;
 }
 
+/* If the real or vector real constant CST of type TYPE has an exact
+   inverse, return it, else return NULL.  */
+
+static tree
+exact_inverse (tree type, tree cst)
+{
+  REAL_VALUE_TYPE r;
+  tree unit_type, *elts;
+  enum machine_mode mode;
+  unsigned vec_nelts, i;
+
+  switch (TREE_CODE (cst))
+    {
+    case REAL_CST:
+      r = TREE_REAL_CST (cst);
+
+      if (exact_real_inverse (TYPE_MODE (type), &r))
+	return build_real (type, r);
+
+      return NULL_TREE;
+
+    case VECTOR_CST:
+      vec_nelts = VECTOR_CST_NELTS (cst);
+      elts = XALLOCAVEC (tree, vec_nelts);
+      unit_type = TREE_TYPE (type);
+      mode = TYPE_MODE (unit_type);
+
+      for (i = 0; i < vec_nelts; i++)
+	{
+	  r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
+	  if (!exact_real_inverse (mode, &r))
+	    return NULL_TREE;
+	  elts[i] = build_real (unit_type, r);
+	}
+
+      return build_vector (type, elts);
+
+    default:
+      return NULL_TREE;
+    }
+}
+
 /* Fold a binary expression of code CODE and type TYPE with operands
    OP0 and OP1.  LOC is the location of the resulting expression.
    Return the folded expression if folding is successful.  Otherwise,
@@ -11734,23 +11776,25 @@  fold_binary_loc (location_t loc,
 	 so only do this if -freciprocal-math.  We can actually
 	 always safely do it if ARG1 is a power of two, but it's hard to
 	 tell if it is or not in a portable manner.  */
-      if (TREE_CODE (arg1) == REAL_CST)
+      if (TREE_CODE (arg1) == REAL_CST
+	  || (TREE_CODE (arg1) == COMPLEX_CST
+	      && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
+	  || (TREE_CODE (arg1) == VECTOR_CST
+	      && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1))))
 	{
 	  if (flag_reciprocal_math
-	      && 0 != (tem = const_binop (code, build_real (type, dconst1),
+	      && 0 != (tem = fold_binary (code, type, build_one_cst (type),
 					  arg1)))
 	    return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
-	  /* Find the reciprocal if optimizing and the result is exact.  */
-	  if (optimize)
+	  /* Find the reciprocal if optimizing and the result is exact.
+	     TODO: Complex reciprocal not implemented.  */
+	  if (optimize
+	      && TREE_CODE (arg1) != COMPLEX_CST)
 	    {
-	      REAL_VALUE_TYPE r;
-	      r = TREE_REAL_CST (arg1);
-	      if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r))
-		{
-		  tem = build_real (type, r);
-		  return fold_build2_loc (loc, MULT_EXPR, type,
-				      fold_convert_loc (loc, type, arg0), tem);
-		}
+	      tree inverse = exact_inverse (TREE_TYPE (arg0), arg1);
+
+	      if (inverse)
+		return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse);
 	    }
 	}
       /* Convert A/B/C to A/(B*C).  */
Index: gcc/testsuite/gcc.target/powerpc/pr44214-3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr44214-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr44214-3.c	(revision 0)
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=power7 -fdump-tree-optimized" } */
+
+void do_div (vector double *a, vector double *b)
+{
+  *a = *b / (vector double) { 2.0, 2.0 };
+}
+
+/* Since 2.0 has an exact reciprocal, constant folding should multiply *b
+   by the reciprocals of the vector elements.  As a result there should be
+   one vector multiply and zero divides in the optimized code.  The string
+   " * " occurs 3 times: one multiply and two indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.target/powerpc/pr44214-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr44214-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr44214-1.c	(revision 0)
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -mcpu=power7 -fdump-tree-optimized" } */
+
+void do_div (vector double *a, vector double *b)
+{
+  *a = *b / (vector double) { 2.0, 3.0 };
+}
+
+/* Constant folding should multiply *b by the reciprocals of the
+   vector elements.  As a result there should be one vector multiply
+   and zero divides in the optimized code.  The string " * " occurs
+   3 times: one multiply and two indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.dg/pr44214-2.c
===================================================================
--- gcc/testsuite/gcc.dg/pr44214-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/pr44214-2.c	(revision 0)
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+
+void do_div (_Complex double *a, _Complex double *b)
+{
+  *a = *b / (4.0 - 5.0fi);
+}
+
+/* Constant folding should multiply *b by the reciprocal of 4-5i
+   = 4/41 - (5/41)i.  As a result there should be 4 multiplies and
+   zero divides in the optimized code.  The string " * " occurs 6
+   times: 4 multiplies and 2 indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 6 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */