diff mbox series

[1/3] Perform fold when propagating.

Message ID 20190813083326.10833-2-rdapp@linux.ibm.com
State New
Headers show
Series Simplify wrapped binops. | expand

Commit Message

Robin Dapp Aug. 13, 2019, 8:33 a.m. UTC
This patch performs more aggressive folding in order for the
match.pd changes to kick in later.

Some test cases rely on VRP doing something which now already
happens during CCP so adjust them accordingly.

Also, the loop versioning pass was missing one case when
deconstructing addresses that would only be triggered after
this patch for me:
It could handle addition and subsequent convert/nop but not
a convert/nop directly.  This would cause the hash to be
calculated differently and, in turn, cause the pass to miss
a versioning opportunity.  Fixed this by adding the missing
case.
---
 gcc/fortran/trans-intrinsic.c                  | 5 +++--
 gcc/gimple-loop-versioning.cc                  | 6 ++++++
 gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C      | 4 +++-
 gcc/testsuite/gcc.dg/pr35691-1.c               | 4 ++--
 gcc/testsuite/gcc.dg/pr35691-2.c               | 4 ++--
 gcc/testsuite/gcc.dg/pr35691-3.c               | 4 ++--
 gcc/testsuite/gcc.dg/pr35691-4.c               | 4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c     | 4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c      | 4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c    | 4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/loop-15.c        | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr23744.c        | 4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/pr52631.c        | 4 ++--
 gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c | 2 +-
 gcc/tree-ssa-propagate.c                       | 6 ++++--
 15 files changed, 36 insertions(+), 25 deletions(-)

Comments

Richard Biener Aug. 13, 2019, 9:30 a.m. UTC | #1
On Tue, Aug 13, 2019 at 10:36 AM Robin Dapp <rdapp@linux.ibm.com> wrote:
>
> This patch performs more aggressive folding in order for the
> match.pd changes to kick in later.
>
> Some test cases rely on VRP doing something which now already
> happens during CCP so adjust them accordingly.
>
> Also, the loop versioning pass was missing one case when
> deconstructing addresses that would only be triggered after
> this patch for me:
> It could handle addition and subsequent convert/nop but not
> a convert/nop directly.  This would cause the hash to be
> calculated differently and, in turn, cause the pass to miss
> a versioning opportunity.  Fixed this by adding the missing
> case.
> ---
>  gcc/fortran/trans-intrinsic.c                  | 5 +++--
>  gcc/gimple-loop-versioning.cc                  | 6 ++++++
>  gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C      | 4 +++-
>  gcc/testsuite/gcc.dg/pr35691-1.c               | 4 ++--
>  gcc/testsuite/gcc.dg/pr35691-2.c               | 4 ++--
>  gcc/testsuite/gcc.dg/pr35691-3.c               | 4 ++--
>  gcc/testsuite/gcc.dg/pr35691-4.c               | 4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c     | 4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c      | 4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c    | 4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/loop-15.c        | 2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr23744.c        | 4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/pr52631.c        | 4 ++--
>  gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c | 2 +-
>  gcc/tree-ssa-propagate.c                       | 6 ++++--
>  15 files changed, 36 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
> index a6e33833680..99ec5f34319 100644
> --- a/gcc/fortran/trans-intrinsic.c
> +++ b/gcc/fortran/trans-intrinsic.c
> @@ -5428,8 +5428,9 @@ gfc_conv_intrinsic_findloc (gfc_se *se, gfc_expr *expr)
>    tree type;
>    tree tmp;
>    tree found;
> -  tree forward_branch;
> -  tree back_branch;
> +  /* Initialize here to avoid 'maybe used uninitialized'.  */
> +  tree forward_branch = NULL_TREE;
> +  tree back_branch = NULL_TREE;
>    gfc_loopinfo loop;
>    gfc_ss *arrayss;
>    gfc_ss *maskss;
> diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
> index 8fa19488490..36c70543402 100644
> --- a/gcc/gimple-loop-versioning.cc
> +++ b/gcc/gimple-loop-versioning.cc
> @@ -1266,6 +1266,12 @@ loop_versioning::record_address_fragment (gimple *stmt,
>                   continue;
>                 }
>             }
> +         if (code == NOP_EXPR)

CONVERT_EXPR_CODE_P (code)

> +           {
> +             tree op1 = gimple_assign_rhs1 (assign);
> +             address->terms[i].expr = strip_casts (op1);
> +             continue;
> +           }
>         }
>        i += 1;
>      }
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C b/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C
> index ca7e18f66a9..83a8f0668df 100644
> --- a/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C
> +++ b/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-dse2-details -Wno-return-type" } */
> +/* { dg-options "-O2 -fdump-tree-dse2-details -fdump-tree-vrp1-details -Wno-return-type" } */
>
>  typedef __SIZE_TYPE__ size_t;
>  extern "C"
> @@ -55,3 +55,5 @@ fill_vec_av_set (av_set_t av)
>
>  /* { dg-final { scan-tree-dump-not "Trimming statement .head = -" "dse2" } } */
>  /* { dg-final { scan-tree-dump-not "mem\[^\r\n\]*, 0\\);" "dse2" } } */
> +/* { dg-final { scan-tree-dump "Folded into: GIMPLE_NOP" "vrp1" } } */
> +/* { dg-final { scan-tree-dump-not "memmove" "dse2" } } */  }
> diff --git a/gcc/testsuite/gcc.dg/pr35691-1.c b/gcc/testsuite/gcc.dg/pr35691-1.c
> index 34dc02ab560..2e1d8bd0f43 100644
> --- a/gcc/testsuite/gcc.dg/pr35691-1.c
> +++ b/gcc/testsuite/gcc.dg/pr35691-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
> +/* { dg-options "-O2 -fdump-tree-ccp1-details" } */

Hmm.  So this shows that while CCP doesn't produce anything
interesting in the lattice we still substitute (and now fold) everything.
The immediate next pass is forwprop so we're doing everything twice.

In total we have 3 CCP passes, 5 copy-prop and two VRP passes.

So we're re-folding everything an additional 10 times with the patch.
I know I suggested this - eh - but the number looks a bit excessive.
Mostly due to the fact we have so many of these passes.

May I suggest to add a parameter to the substitute-and-fold engine
so we can do the folding on all stmts only when enabled and enable
it just for VRP?  That also avoids the testsuite noise.

I think it's also only necessary to fold a stmt when a (indirect) use
after substitution has either been folded or has (new) SSA name
info (range/known-bits) set?

Thanks,
Richard.

>  int foo(int z0, unsigned z1)
>  {
> @@ -9,4 +9,4 @@ int foo(int z0, unsigned z1)
>    return t2;
>  }
>
> -/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
> +/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr35691-2.c b/gcc/testsuite/gcc.dg/pr35691-2.c
> index b89ce481c6c..d5e335bb72a 100644
> --- a/gcc/testsuite/gcc.dg/pr35691-2.c
> +++ b/gcc/testsuite/gcc.dg/pr35691-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
> +/* { dg-options "-O2 -fdump-tree-ccp1-details" } */
>
>  int foo(int z0, unsigned z1)
>  {
> @@ -9,4 +9,4 @@ int foo(int z0, unsigned z1)
>    return t2;
>  }
>
> -/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
> +/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr35691-3.c b/gcc/testsuite/gcc.dg/pr35691-3.c
> index 75b49a6e2ca..a1896e47b59 100644
> --- a/gcc/testsuite/gcc.dg/pr35691-3.c
> +++ b/gcc/testsuite/gcc.dg/pr35691-3.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
> +/* { dg-options "-O2 -fdump-tree-ccp1-details" } */
>
>  int foo(int z0, unsigned z1)
>  {
> @@ -9,4 +9,4 @@ int foo(int z0, unsigned z1)
>    return t2;
>  }
>
> -/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
> +/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr35691-4.c b/gcc/testsuite/gcc.dg/pr35691-4.c
> index 2d9456bd16f..58f28aebe69 100644
> --- a/gcc/testsuite/gcc.dg/pr35691-4.c
> +++ b/gcc/testsuite/gcc.dg/pr35691-4.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
> +/* { dg-options "-O2 -fdump-tree-ccp1-details" } */
>
>  int foo(int z0, unsigned z1)
>  {
> @@ -9,4 +9,4 @@ int foo(int z0, unsigned z1)
>    return t2;
>  }
>
> -/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
> +/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c
> index 10935293476..17c2a7e2d3e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-dce2 -fdump-tree-forwprop1-details" } */
> +/* { dg-options "-O2 -fdump-tree-dce2 -fdump-tree-ccp1-details" } */
>
>  int abarney[2];
>  int afred[1];
> @@ -23,7 +23,7 @@ void foo(int edx, int eax)
>
>
>  /* Verify that we did a forward propagation.  */
> -/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "forwprop1"} } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "ccp1"} } */
>
>  /* After dce we should have two IF statements remaining as the other
>     two tests can be threaded.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c b/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c
> index b563f5e404c..6890d0067b6 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-forwprop1-details -fdump-tree-ccp1-details" } */
> +/* { dg-options "-O -fdump-tree-ccp1-details" } */
>
>  int f1(int a, int b){
>    int c = a & b;
> @@ -26,4 +26,4 @@ int g3(int a, int b, int c){
>  }
>
>  /* { dg-final { scan-tree-dump-times "Match-and-simplified" 2 "ccp1" } } */
> -/* { dg-final { scan-tree-dump-times "gimple_simplified" 3 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified" 3 "ccp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c
> index 1bef7e7a31f..bef76e597dd 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-forwprop1" } */
> +/* { dg-options "-O -fdump-tree-forwprop2" } */
>
>  int foo (double xx, double xy)
>  {
> @@ -10,4 +10,4 @@ int foo (double xx, double xy)
>    return 2;
>  }
>
> -/* { dg-final { scan-tree-dump "if \\\(x" "forwprop1" } } */
> +/* { dg-final { scan-tree-dump "if \\\(x" "forwprop2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
> index b437518487d..dce6ad57a04 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
> @@ -19,7 +19,7 @@ int bla(void)
>  }
>
>  /* Since the loop is removed, there should be no addition.  */
> -/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
>  /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
>
>  /* The if from the loop header copying remains in the code.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
> index 3385aa1e424..ba3fda352ca 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1" } */
> +/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1-details" } */
>
>  void h (void);
>
> @@ -17,4 +17,4 @@ int g (int i, int j)
>      return 1;
>  }
>
> -/* { dg-final { scan-tree-dump-times "Folding predicate.*to 1" 1 "vrp1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c b/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
> index c18a5d570b4..2e99d112057 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-fre1-details" } */
> +/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-ccp1-details" } */
>
>  unsigned f(unsigned a)
>  {
> @@ -12,6 +12,6 @@ unsigned f(unsigned a)
>  }
>
>  /* We want to verify that we replace the b & 1 with b.  */
> -/* { dg-final { scan-tree-dump-times "Replaced b_\[0-9\]+ & 1 with b_\[0-9\]+ in" 1 "fre1"} } */
> +/* { dg-final { scan-tree-dump-times "Match-and-simplified b_\[0-9\]+ & 1 to b_\[0-9\]+" 1 "ccp1"} } */
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
> index 34ce512f372..e2e0d6cb8bc 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp" }
> +/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp -fno-tree-ccp -fno-tree-copy-prop" } */
>  /* { dg-require-effective-target vect_int } */
>
>  #include "tree-vect.h"
> diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
> index 0862f83e9a1..7a8f1e037b0 100644
> --- a/gcc/tree-ssa-propagate.c
> +++ b/gcc/tree-ssa-propagate.c
> @@ -1064,10 +1064,12 @@ substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
>        /* Replace real uses in the statement.  */
>        did_replace |= substitute_and_fold_engine->replace_uses_in (stmt);
>
> -      /* If we made a replacement, fold the statement.  */
>        if (did_replace)
> +         gimple_set_modified (stmt, true);
> +
> +      if (fold_stmt (&i, follow_single_use_edges))
>         {
> -         fold_stmt (&i, follow_single_use_edges);
> +         did_replace = true;
>           stmt = gsi_stmt (i);
>           gimple_set_modified (stmt, true);
>         }
> --
> 2.17.0
>
Robin Dapp Aug. 13, 2019, 11:23 a.m. UTC | #2
> May I suggest to add a parameter to the substitute-and-fold engine
> so we can do the folding on all stmts only when enabled and enable
> it just for VRP?  That also avoids the testsuite noise.

Would something along these lines do?

diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
index 7a8f1e037b0..6c0d743b823 100644
--- a/gcc/tree-ssa-propagate.c
+++ b/gcc/tree-ssa-propagate.c
@@ -814,7 +814,6 @@ ssa_propagation_engine::ssa_propagate (void)
   ssa_prop_fini ();
 }

-
 /* Return true if STMT is of the form 'mem_ref = RHS', where 'mem_ref'
    is a non-volatile pointer dereference, a structure reference or a
    reference to a single _DECL.  Ignore volatile memory references
@@ -1064,11 +1063,10 @@
substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
       /* Replace real uses in the statement.  */
       did_replace |= substitute_and_fold_engine->replace_uses_in (stmt);

-      if (did_replace)
-	  gimple_set_modified (stmt, true);
-
-      if (fold_stmt (&i, follow_single_use_edges))
+      /* If we made a replacement, fold the statement.  */
+      if (did_replace ||
substitute_and_fold_engine->should_fold_all_stmts ())
 	{
+	  fold_stmt (&i, follow_single_use_edges);
 	  did_replace = true;
 	  stmt = gsi_stmt (i);
 	  gimple_set_modified (stmt, true);
diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
index 81b635e0787..939680f487c 100644
--- a/gcc/tree-ssa-propagate.h
+++ b/gcc/tree-ssa-propagate.h
@@ -107,6 +107,13 @@ class substitute_and_fold_engine
   bool substitute_and_fold (basic_block = NULL);
   bool replace_uses_in (gimple *);
   bool replace_phi_args_in (gphi *);
+
+  /* Users like VRP can overwrite this when they want to perform
+     folding for every propagation.  */
+  virtual bool should_fold_all_stmts (void)
+    {
+      return false;
+    }
 };

 #endif /* _TREE_SSA_PROPAGATE_H  */
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index e2850682da2..8c8fa6f2bec 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -6271,6 +6271,9 @@ class vrp_folder : public substitute_and_fold_engine
     { return vr_values->simplify_stmt_using_ranges (gsi); }
  tree op_with_constant_singleton_value_range (tree op)
     { return vr_values->op_with_constant_singleton_value_range (op); }
+
+  /* Enable aggressive folding in every propagation.  */
+  bool should_fold_all_stmts (void) { return true; }
 };

 /* If the statement pointed by SI has a predicate whose value can be


> I think it's also only necessary to fold a stmt when a (indirect) use
> after substitution has either been folded or has (new) SSA name
> info (range/known-bits) set?

Where would this need to be changed?

Regards
 Robin
Richard Biener Aug. 14, 2019, 9:36 a.m. UTC | #3
On Tue, Aug 13, 2019 at 1:24 PM Robin Dapp <rdapp@linux.ibm.com> wrote:
>
> > May I suggest to add a parameter to the substitute-and-fold engine
> > so we can do the folding on all stmts only when enabled and enable
> > it just for VRP?  That also avoids the testsuite noise.
>
> Would something along these lines do?
>
> diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
> index 7a8f1e037b0..6c0d743b823 100644
> --- a/gcc/tree-ssa-propagate.c
> +++ b/gcc/tree-ssa-propagate.c
> @@ -814,7 +814,6 @@ ssa_propagation_engine::ssa_propagate (void)
>    ssa_prop_fini ();
>  }
>
> -
>  /* Return true if STMT is of the form 'mem_ref = RHS', where 'mem_ref'
>     is a non-volatile pointer dereference, a structure reference or a
>     reference to a single _DECL.  Ignore volatile memory references
> @@ -1064,11 +1063,10 @@
> substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
>        /* Replace real uses in the statement.  */
>        did_replace |= substitute_and_fold_engine->replace_uses_in (stmt);
>
> -      if (did_replace)
> -         gimple_set_modified (stmt, true);
> -
> -      if (fold_stmt (&i, follow_single_use_edges))
> +      /* If we made a replacement, fold the statement.  */
> +      if (did_replace ||
> substitute_and_fold_engine->should_fold_all_stmts ())
>         {
> +         fold_stmt (&i, follow_single_use_edges);
>           did_replace = true;
>           stmt = gsi_stmt (i);
>           gimple_set_modified (stmt, true);
> diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
> index 81b635e0787..939680f487c 100644
> --- a/gcc/tree-ssa-propagate.h
> +++ b/gcc/tree-ssa-propagate.h
> @@ -107,6 +107,13 @@ class substitute_and_fold_engine
>    bool substitute_and_fold (basic_block = NULL);
>    bool replace_uses_in (gimple *);
>    bool replace_phi_args_in (gphi *);
> +
> +  /* Users like VRP can overwrite this when they want to perform
> +     folding for every propagation.  */
> +  virtual bool should_fold_all_stmts (void)
> +    {
> +      return false;
> +    }

Since this is constant for a single invocation I'd either
add a flag param to substitute_and_fold or a bool
class member initialized at construction time.

Also do

  if ((did_replace || fold_all_stmts)
     && fold_stmt (...))
   {
   }

to avoid extra work when folding does nothing.

Otherwise yes, this woudl work.

>  };
>
>  #endif /* _TREE_SSA_PROPAGATE_H  */
> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
> index e2850682da2..8c8fa6f2bec 100644
> --- a/gcc/tree-vrp.c
> +++ b/gcc/tree-vrp.c
> @@ -6271,6 +6271,9 @@ class vrp_folder : public substitute_and_fold_engine
>      { return vr_values->simplify_stmt_using_ranges (gsi); }
>   tree op_with_constant_singleton_value_range (tree op)
>      { return vr_values->op_with_constant_singleton_value_range (op); }
> +
> +  /* Enable aggressive folding in every propagation.  */
> +  bool should_fold_all_stmts (void) { return true; }
>  };
>
>  /* If the statement pointed by SI has a predicate whose value can be
>
>
> > I think it's also only necessary to fold a stmt when a (indirect) use
> > after substitution has either been folded or has (new) SSA name
> > info (range/known-bits) set?
>
> Where would this need to be changed?

It was just a random thought, doing this would need to keep track
of "changed" SSA names (in a bitmap?) and before folding
checking all uses on the stmt if they are "changed".  The overhead
for this may be higher than the folding savings we get.  The "changed"
would also need to tickle down some distance so patterns with
deeper nested subexpressions would be tried.

Richard.

>
> Regards
>  Robin
>
diff mbox series

Patch

diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index a6e33833680..99ec5f34319 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -5428,8 +5428,9 @@  gfc_conv_intrinsic_findloc (gfc_se *se, gfc_expr *expr)
   tree type;
   tree tmp;
   tree found;
-  tree forward_branch;
-  tree back_branch;
+  /* Initialize here to avoid 'maybe used uninitialized'.  */
+  tree forward_branch = NULL_TREE;
+  tree back_branch = NULL_TREE;
   gfc_loopinfo loop;
   gfc_ss *arrayss;
   gfc_ss *maskss;
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 8fa19488490..36c70543402 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -1266,6 +1266,12 @@  loop_versioning::record_address_fragment (gimple *stmt,
 		  continue;
 		}
 	    }
+	  if (code == NOP_EXPR)
+	    {
+	      tree op1 = gimple_assign_rhs1 (assign);
+	      address->terms[i].expr = strip_casts (op1);
+	      continue;
+	    }
 	}
       i += 1;
     }
diff --git a/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C b/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C
index ca7e18f66a9..83a8f0668df 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/ssa-dse-2.C
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dse2-details -Wno-return-type" } */
+/* { dg-options "-O2 -fdump-tree-dse2-details -fdump-tree-vrp1-details -Wno-return-type" } */
 
 typedef __SIZE_TYPE__ size_t;
 extern "C"
@@ -55,3 +55,5 @@  fill_vec_av_set (av_set_t av)
 
 /* { dg-final { scan-tree-dump-not "Trimming statement .head = -" "dse2" } } */
 /* { dg-final { scan-tree-dump-not "mem\[^\r\n\]*, 0\\);" "dse2" } } */
+/* { dg-final { scan-tree-dump "Folded into: GIMPLE_NOP" "vrp1" } } */
+/* { dg-final { scan-tree-dump-not "memmove" "dse2" } } */  }
diff --git a/gcc/testsuite/gcc.dg/pr35691-1.c b/gcc/testsuite/gcc.dg/pr35691-1.c
index 34dc02ab560..2e1d8bd0f43 100644
--- a/gcc/testsuite/gcc.dg/pr35691-1.c
+++ b/gcc/testsuite/gcc.dg/pr35691-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -fdump-tree-ccp1-details" } */
 
 int foo(int z0, unsigned z1)
 {
@@ -9,4 +9,4 @@  int foo(int z0, unsigned z1)
   return t2;
 }
 
-/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr35691-2.c b/gcc/testsuite/gcc.dg/pr35691-2.c
index b89ce481c6c..d5e335bb72a 100644
--- a/gcc/testsuite/gcc.dg/pr35691-2.c
+++ b/gcc/testsuite/gcc.dg/pr35691-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -fdump-tree-ccp1-details" } */
 
 int foo(int z0, unsigned z1)
 {
@@ -9,4 +9,4 @@  int foo(int z0, unsigned z1)
   return t2;
 }
 
-/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr35691-3.c b/gcc/testsuite/gcc.dg/pr35691-3.c
index 75b49a6e2ca..a1896e47b59 100644
--- a/gcc/testsuite/gcc.dg/pr35691-3.c
+++ b/gcc/testsuite/gcc.dg/pr35691-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -fdump-tree-ccp1-details" } */
 
 int foo(int z0, unsigned z1)
 {
@@ -9,4 +9,4 @@  int foo(int z0, unsigned z1)
   return t2;
 }
 
-/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr35691-4.c b/gcc/testsuite/gcc.dg/pr35691-4.c
index 2d9456bd16f..58f28aebe69 100644
--- a/gcc/testsuite/gcc.dg/pr35691-4.c
+++ b/gcc/testsuite/gcc.dg/pr35691-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -fdump-tree-ccp1-details" } */
 
 int foo(int z0, unsigned z1)
 {
@@ -9,4 +9,4 @@  int foo(int z0, unsigned z1)
   return t2;
 }
 
-/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to _\[0-9\]* = \\(int\\) z1_\[0-9\]*\\(D\\);" "ccp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c
index 10935293476..17c2a7e2d3e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040305-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dce2 -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -fdump-tree-dce2 -fdump-tree-ccp1-details" } */
   
 int abarney[2];
 int afred[1];
@@ -23,7 +23,7 @@  void foo(int edx, int eax)
  
 
 /* Verify that we did a forward propagation.  */
-/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "forwprop1"} } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "ccp1"} } */
 
 /* After dce we should have two IF statements remaining as the other
    two tests can be threaded.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c b/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c
index b563f5e404c..6890d0067b6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-forwprop1-details -fdump-tree-ccp1-details" } */
+/* { dg-options "-O -fdump-tree-ccp1-details" } */
 
 int f1(int a, int b){
   int c = a & b;
@@ -26,4 +26,4 @@  int g3(int a, int b, int c){
 }
 
 /* { dg-final { scan-tree-dump-times "Match-and-simplified" 2 "ccp1" } } */
-/* { dg-final { scan-tree-dump-times "gimple_simplified" 3 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified" 3 "ccp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c
index 1bef7e7a31f..bef76e597dd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-16.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-forwprop1" } */
+/* { dg-options "-O -fdump-tree-forwprop2" } */
 
 int foo (double xx, double xy)
 {
@@ -10,4 +10,4 @@  int foo (double xx, double xy)
   return 2;
 }
 
-/* { dg-final { scan-tree-dump "if \\\(x" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "if \\\(x" "forwprop2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
index b437518487d..dce6ad57a04 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
@@ -19,7 +19,7 @@  int bla(void)
 }
 
 /* Since the loop is removed, there should be no addition.  */
-/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
 /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
 
 /* The if from the loop header copying remains in the code.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
index 3385aa1e424..ba3fda352ca 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1" } */
+/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1-details" } */
 
 void h (void);
 
@@ -17,4 +17,4 @@  int g (int i, int j)
     return 1;
 }
 
-/* { dg-final { scan-tree-dump-times "Folding predicate.*to 1" 1 "vrp1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c b/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
index c18a5d570b4..2e99d112057 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-fre1-details" } */
+/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-ccp1-details" } */
 
 unsigned f(unsigned a)
 {
@@ -12,6 +12,6 @@  unsigned f(unsigned a)
 }
 
 /* We want to verify that we replace the b & 1 with b.  */
-/* { dg-final { scan-tree-dump-times "Replaced b_\[0-9\]+ & 1 with b_\[0-9\]+ in" 1 "fre1"} } */
+/* { dg-final { scan-tree-dump-times "Match-and-simplified b_\[0-9\]+ & 1 to b_\[0-9\]+" 1 "ccp1"} } */
  
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
index 34ce512f372..e2e0d6cb8bc 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp" }
+/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp -fno-tree-ccp -fno-tree-copy-prop" } */
 /* { dg-require-effective-target vect_int } */
 
 #include "tree-vect.h"
diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
index 0862f83e9a1..7a8f1e037b0 100644
--- a/gcc/tree-ssa-propagate.c
+++ b/gcc/tree-ssa-propagate.c
@@ -1064,10 +1064,12 @@  substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
       /* Replace real uses in the statement.  */
       did_replace |= substitute_and_fold_engine->replace_uses_in (stmt);
 
-      /* If we made a replacement, fold the statement.  */
       if (did_replace)
+	  gimple_set_modified (stmt, true);
+
+      if (fold_stmt (&i, follow_single_use_edges))
 	{
-	  fold_stmt (&i, follow_single_use_edges);
+	  did_replace = true;
 	  stmt = gsi_stmt (i);
 	  gimple_set_modified (stmt, true);
 	}