diff mbox

[RFC] Fix P1 PR77498

Message ID alpine.LSU.2.20.1703291147460.30051@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener March 29, 2017, 10:05 a.m. UTC
After quite some pondering over this and other related bugs I propose
the following for GCC 7 which tames down PRE a bit (back to levels
of GCC 6).  Technically it's the wrong place to fix this, we do
have measures in place during elimination but they are not in effect
at -O2.  For GCC 8 I'd like to be more aggressive there but that
would require to enable predictive commoning at -O2 (with some
limits to its unrolling) to not lose optimization opportunities.

The other option is to ignore this issue and postpone the solution
to GCC 8.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

Any preference?

Thanks,
Richard.

2017-03-29  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/77498
	* tree-ssa-pre.c (phi_translate_1): Do not allow simplifications
	to non-constants over backedges.

	* gfortran.dg/pr77498.f: New testcase.

Index: gcc/tree-ssa-pre.c
===================================================================
*** gcc/tree-ssa-pre.c	(revision 246026)
--- gcc/tree-ssa-pre.c	(working copy)
*************** phi_translate_1 (pre_expr expr, bitmap_s
*** 1468,1477 ****
  		   leader for it.  */
  		if (constant->kind != CONSTANT)
  		  {
! 		    unsigned value_id = get_expr_value_id (constant);
! 		    constant = find_leader_in_sets (value_id, set1, set2);
! 		    if (constant)
! 		      return constant;
  		  }
  		else
  		  return constant;
--- 1468,1487 ----
  		   leader for it.  */
  		if (constant->kind != CONSTANT)
  		  {
! 		    /* Do not allow simplifications to non-constants over
! 		       backedges as this will likely result in a loop PHI node
! 		       to be inserted and increased register pressure.
! 		       See PR77498 - this avoids doing predcoms work in
! 		       a less efficient way.  */
! 		    if (find_edge (pred, phiblock)->flags & EDGE_DFS_BACK)
! 		      ;
! 		    else
! 		      {
! 			unsigned value_id = get_expr_value_id (constant);
! 			constant = find_leader_in_sets (value_id, set1, set2);
! 			if (constant)
! 			  return constant;
! 		      }
  		  }
  		else
  		  return constant;

Comments

Bin.Cheng March 29, 2017, 3:43 p.m. UTC | #1
On Wed, Mar 29, 2017 at 11:05 AM, Richard Biener <rguenther@suse.de> wrote:
>
> After quite some pondering over this and other related bugs I propose
> the following for GCC 7 which tames down PRE a bit (back to levels
> of GCC 6).  Technically it's the wrong place to fix this, we do
> have measures in place during elimination but they are not in effect
> at -O2.  For GCC 8 I'd like to be more aggressive there but that
> would require to enable predictive commoning at -O2 (with some
> limits to its unrolling) to not lose optimization opportunities.
>
> The other option is to ignore this issue and postpone the solution
> to GCC 8.
>
> Bootstrapped / tested on x86_64-unknown-linux-gnu.
>
> Any preference?
+1 enabling predcom at O2 for this kind of transformation, remember I
once suggested that.

Thanks,
bin
>
> Thanks,
> Richard.
>
> 2017-03-29  Richard Biener  <rguenther@suse.de>
>
>         PR tree-optimization/77498
>         * tree-ssa-pre.c (phi_translate_1): Do not allow simplifications
>         to non-constants over backedges.
>
>         * gfortran.dg/pr77498.f: New testcase.
>
> Index: gcc/tree-ssa-pre.c
> ===================================================================
> *** gcc/tree-ssa-pre.c  (revision 246026)
> --- gcc/tree-ssa-pre.c  (working copy)
> *************** phi_translate_1 (pre_expr expr, bitmap_s
> *** 1468,1477 ****
>                    leader for it.  */
>                 if (constant->kind != CONSTANT)
>                   {
> !                   unsigned value_id = get_expr_value_id (constant);
> !                   constant = find_leader_in_sets (value_id, set1, set2);
> !                   if (constant)
> !                     return constant;
>                   }
>                 else
>                   return constant;
> --- 1468,1487 ----
>                    leader for it.  */
>                 if (constant->kind != CONSTANT)
>                   {
> !                   /* Do not allow simplifications to non-constants over
> !                      backedges as this will likely result in a loop PHI node
> !                      to be inserted and increased register pressure.
> !                      See PR77498 - this avoids doing predcoms work in
> !                      a less efficient way.  */
> !                   if (find_edge (pred, phiblock)->flags & EDGE_DFS_BACK)
> !                     ;
> !                   else
> !                     {
> !                       unsigned value_id = get_expr_value_id (constant);
> !                       constant = find_leader_in_sets (value_id, set1, set2);
> !                       if (constant)
> !                         return constant;
> !                     }
>                   }
>                 else
>                   return constant;
> Index: gcc/testsuite/gfortran.dg/pr77498.f
> ===================================================================
> --- gcc/testsuite/gfortran.dg/pr77498.f (nonexistent)
> +++ gcc/testsuite/gfortran.dg/pr77498.f (working copy)
> @@ -0,0 +1,36 @@
> +! { dg-do compile }
> +! { dg-options "-O2 -ffast-math -fdump-tree-pre" }
> +
> +      subroutine foo(U,V,R,N,A)
> +      integer N
> +      real*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
> +      integer I3, I2, I1
> +C
> +      do I3=2,N-1
> +       do I2=2,N-1
> +        do I1=2,N-1
> +         R(I1,I2,I3)=V(I1,I2,I3)
> +     *      -A(0)*( U(I1,  I2,  I3  ) )
> +     *      -A(1)*( U(I1-1,I2,  I3  ) + U(I1+1,I2,  I3  )
> +     *                 +  U(I1,  I2-1,I3  ) + U(I1,  I2+1,I3  )
> +     *                 +  U(I1,  I2,  I3-1) + U(I1,  I2,  I3+1) )
> +     *      -A(2)*( U(I1-1,I2-1,I3  ) + U(I1+1,I2-1,I3  )
> +     *                 +  U(I1-1,I2+1,I3  ) + U(I1+1,I2+1,I3  )
> +     *                 +  U(I1,  I2-1,I3-1) + U(I1,  I2+1,I3-1)
> +     *                 +  U(I1,  I2-1,I3+1) + U(I1,  I2+1,I3+1)
> +     *                 +  U(I1-1,I2,  I3-1) + U(I1-1,I2,  I3+1)
> +     *                 +  U(I1+1,I2,  I3-1) + U(I1+1,I2,  I3+1) )
> +     *      -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
> +     *                 +  U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
> +     *                 +  U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
> +     *                 +  U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
> +        enddo
> +       enddo
> +      enddo
> +      return
> +      end
> +
> +! PRE shouldn't do predictive commonings job here (and in a bad way)
> +! ???  It still does but not as bad as it could.  Less prephitmps
> +! would be better, pcom does it with 6.
> +! { dg-final { scan-tree-dump-times "# prephitmp" 9 "pre" } }
Jeff Law March 29, 2017, 7:11 p.m. UTC | #2
On 03/29/2017 04:05 AM, Richard Biener wrote:
>
> After quite some pondering over this and other related bugs I propose
> the following for GCC 7 which tames down PRE a bit (back to levels
> of GCC 6).  Technically it's the wrong place to fix this, we do
> have measures in place during elimination but they are not in effect
> at -O2.  For GCC 8 I'd like to be more aggressive there but that
> would require to enable predictive commoning at -O2 (with some
> limits to its unrolling) to not lose optimization opportunities.
>
> The other option is to ignore this issue and postpone the solution
> to GCC 8.
>
> Bootstrapped / tested on x86_64-unknown-linux-gnu.
>
> Any preference?
>
> Thanks,
> Richard.
>
> 2017-03-29  Richard Biener  <rguenther@suse.de>
>
> 	PR tree-optimization/77498
> 	* tree-ssa-pre.c (phi_translate_1): Do not allow simplifications
> 	to non-constants over backedges.
>
> 	* gfortran.dg/pr77498.f: New testcase.
I've got a slight preference for this patch.

If you had a good start on the real fix then I'd lean more towards 
postponing.

jeff
Richard Biener March 30, 2017, 7:13 a.m. UTC | #3
On Wed, 29 Mar 2017, Jeff Law wrote:

> On 03/29/2017 04:05 AM, Richard Biener wrote:
> > 
> > After quite some pondering over this and other related bugs I propose
> > the following for GCC 7 which tames down PRE a bit (back to levels
> > of GCC 6).  Technically it's the wrong place to fix this, we do
> > have measures in place during elimination but they are not in effect
> > at -O2.  For GCC 8 I'd like to be more aggressive there but that
> > would require to enable predictive commoning at -O2 (with some
> > limits to its unrolling) to not lose optimization opportunities.
> > 
> > The other option is to ignore this issue and postpone the solution
> > to GCC 8.
> > 
> > Bootstrapped / tested on x86_64-unknown-linux-gnu.
> > 
> > Any preference?
> > 
> > Thanks,
> > Richard.
> > 
> > 2017-03-29  Richard Biener  <rguenther@suse.de>
> > 
> > 	PR tree-optimization/77498
> > 	* tree-ssa-pre.c (phi_translate_1): Do not allow simplifications
> > 	to non-constants over backedges.
> > 
> > 	* gfortran.dg/pr77498.f: New testcase.
> I've got a slight preference for this patch.
> 
> If you had a good start on the real fix then I'd lean more towards postponing.

I wouldn't it yet call "real fix" but just an idea (where I tested the
PRE side already, with the expected testsuite regressions).  I've done
no benchmarking at all for that.  For the proposed patch the situation
is that we're only going to remove some PRE that was done additionally
over GCC 6 because of the rev. that caused the regression, so I have
confidence that it won't make things worse when comparing to GCC 6.

Thus I've now applied the patch.

Richard.
Christophe Lyon March 31, 2017, 9:01 a.m. UTC | #4
Hi Richard,


On 30 March 2017 at 09:13, Richard Biener <rguenther@suse.de> wrote:
> On Wed, 29 Mar 2017, Jeff Law wrote:
>
>> On 03/29/2017 04:05 AM, Richard Biener wrote:
>> >
>> > After quite some pondering over this and other related bugs I propose
>> > the following for GCC 7 which tames down PRE a bit (back to levels
>> > of GCC 6).  Technically it's the wrong place to fix this, we do
>> > have measures in place during elimination but they are not in effect
>> > at -O2.  For GCC 8 I'd like to be more aggressive there but that
>> > would require to enable predictive commoning at -O2 (with some
>> > limits to its unrolling) to not lose optimization opportunities.
>> >
>> > The other option is to ignore this issue and postpone the solution
>> > to GCC 8.
>> >
>> > Bootstrapped / tested on x86_64-unknown-linux-gnu.
>> >
>> > Any preference?
>> >
>> > Thanks,
>> > Richard.
>> >
>> > 2017-03-29  Richard Biener  <rguenther@suse.de>
>> >
>> >     PR tree-optimization/77498
>> >     * tree-ssa-pre.c (phi_translate_1): Do not allow simplifications
>> >     to non-constants over backedges.
>> >
>> >     * gfortran.dg/pr77498.f: New testcase.
>> I've got a slight preference for this patch.
>>
>> If you had a good start on the real fix then I'd lean more towards postponing.
>
> I wouldn't it yet call "real fix" but just an idea (where I tested the
> PRE side already, with the expected testsuite regressions).  I've done
> no benchmarking at all for that.  For the proposed patch the situation
> is that we're only going to remove some PRE that was done additionally
> over GCC 6 because of the rev. that caused the regression, so I have
> confidence that it won't make things worse when comparing to GCC 6.
>
> Thus I've now applied the patch.
>

With this patch, the following testcase now fails on arm* targets:
gcc.dg/tree-ssa/pr71347.c scan-tree-dump-not optimized ".* = MEM.*;"

I'll have to rebuild manually if you need the dumps, let me know.

Thanks,

Christophe

> Richard.
Rainer Orth March 31, 2017, 9:05 a.m. UTC | #5
Hi Christophe,

> With this patch, the following testcase now fails on arm* targets:
> gcc.dg/tree-ssa/pr71347.c scan-tree-dump-not optimized ".* = MEM.*;"

same on Solaris/SPARC.

	Rainer
Richard Biener March 31, 2017, 9:13 a.m. UTC | #6
On Fri, 31 Mar 2017, Christophe Lyon wrote:

> Hi Richard,
> 
> 
> On 30 March 2017 at 09:13, Richard Biener <rguenther@suse.de> wrote:
> > On Wed, 29 Mar 2017, Jeff Law wrote:
> >
> >> On 03/29/2017 04:05 AM, Richard Biener wrote:
> >> >
> >> > After quite some pondering over this and other related bugs I propose
> >> > the following for GCC 7 which tames down PRE a bit (back to levels
> >> > of GCC 6).  Technically it's the wrong place to fix this, we do
> >> > have measures in place during elimination but they are not in effect
> >> > at -O2.  For GCC 8 I'd like to be more aggressive there but that
> >> > would require to enable predictive commoning at -O2 (with some
> >> > limits to its unrolling) to not lose optimization opportunities.
> >> >
> >> > The other option is to ignore this issue and postpone the solution
> >> > to GCC 8.
> >> >
> >> > Bootstrapped / tested on x86_64-unknown-linux-gnu.
> >> >
> >> > Any preference?
> >> >
> >> > Thanks,
> >> > Richard.
> >> >
> >> > 2017-03-29  Richard Biener  <rguenther@suse.de>
> >> >
> >> >     PR tree-optimization/77498
> >> >     * tree-ssa-pre.c (phi_translate_1): Do not allow simplifications
> >> >     to non-constants over backedges.
> >> >
> >> >     * gfortran.dg/pr77498.f: New testcase.
> >> I've got a slight preference for this patch.
> >>
> >> If you had a good start on the real fix then I'd lean more towards postponing.
> >
> > I wouldn't it yet call "real fix" but just an idea (where I tested the
> > PRE side already, with the expected testsuite regressions).  I've done
> > no benchmarking at all for that.  For the proposed patch the situation
> > is that we're only going to remove some PRE that was done additionally
> > over GCC 6 because of the rev. that caused the regression, so I have
> > confidence that it won't make things worse when comparing to GCC 6.
> >
> > Thus I've now applied the patch.
> >
> 
> With this patch, the following testcase now fails on arm* targets:
> gcc.dg/tree-ssa/pr71347.c scan-tree-dump-not optimized ".* = MEM.*;"
> 
> I'll have to rebuild manually if you need the dumps, let me know.

Ok, there was an XFAIL there which was removed after the PRE enhancement
and thus now needs to be put back.  I'll do that.

Thanks for noticing.

Richard.
diff mbox

Patch

Index: gcc/testsuite/gfortran.dg/pr77498.f
===================================================================
--- gcc/testsuite/gfortran.dg/pr77498.f	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr77498.f	(working copy)
@@ -0,0 +1,36 @@ 
+! { dg-do compile }
+! { dg-options "-O2 -ffast-math -fdump-tree-pre" }
+
+      subroutine foo(U,V,R,N,A)
+      integer N
+      real*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
+      integer I3, I2, I1
+C
+      do I3=2,N-1
+       do I2=2,N-1
+        do I1=2,N-1
+         R(I1,I2,I3)=V(I1,I2,I3)
+     *      -A(0)*( U(I1,  I2,  I3  ) )
+     *      -A(1)*( U(I1-1,I2,  I3  ) + U(I1+1,I2,  I3  )
+     *                 +  U(I1,  I2-1,I3  ) + U(I1,  I2+1,I3  )
+     *                 +  U(I1,  I2,  I3-1) + U(I1,  I2,  I3+1) )
+     *      -A(2)*( U(I1-1,I2-1,I3  ) + U(I1+1,I2-1,I3  )
+     *                 +  U(I1-1,I2+1,I3  ) + U(I1+1,I2+1,I3  )
+     *                 +  U(I1,  I2-1,I3-1) + U(I1,  I2+1,I3-1)
+     *                 +  U(I1,  I2-1,I3+1) + U(I1,  I2+1,I3+1)
+     *                 +  U(I1-1,I2,  I3-1) + U(I1-1,I2,  I3+1)
+     *                 +  U(I1+1,I2,  I3-1) + U(I1+1,I2,  I3+1) )
+     *      -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
+     *                 +  U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
+     *                 +  U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
+     *                 +  U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
+        enddo
+       enddo
+      enddo
+      return
+      end
+
+! PRE shouldn't do predictive commonings job here (and in a bad way)
+! ???  It still does but not as bad as it could.  Less prephitmps
+! would be better, pcom does it with 6.
+! { dg-final { scan-tree-dump-times "# prephitmp" 9 "pre" } }