diff mbox

Fix profile updating in ifcombine

Message ID 20170202202923.GC91024@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka Feb. 2, 2017, 8:29 p.m. UTC
Hi,
this patches fixes profile updating in the ifcombine.  This is not hard to do
and ifcombine is #2 profile update offender out of tree passes (#1 is the
vectorizer).

I think this counts as a regression, becuase one can trigger arbitrarily
bad profile after ifconversion and defnitly construct a testcase where this
will cause us optimize for size where we optimized for speed previously.

Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after testers
pick up the threading fix) unless there are complains.

	* gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile mismatches.

	* tree-ssa-ifcombine.c (update_profile_after_ifcombine): New function.
	(ifcombine_ifandif): Use it.

Comments

Andrew Pinski Feb. 2, 2017, 9:34 p.m. UTC | #1
On Thu, Feb 2, 2017 at 12:29 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patches fixes profile updating in the ifcombine.  This is not hard to do
> and ifcombine is #2 profile update offender out of tree passes (#1 is the
> vectorizer).
>
> I think this counts as a regression, becuase one can trigger arbitrarily
> bad profile after ifconversion and defnitly construct a testcase where this
> will cause us optimize for size where we optimized for speed previously.


This might be related to PR 78200 but I could be wrong.

Thanks,
Andrew

>
> Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after testers
> pick up the threading fix) unless there are complains.
>
>         * gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile mismatches.
>         * gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile mismatches.
>
>         * tree-ssa-ifcombine.c (update_profile_after_ifcombine): New function.
>         (ifcombine_ifandif): Use it.
>
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-optimized" } */
> +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase for PR31657.  */
>
> @@ -14,3 +14,4 @@ int foo (int x, int a, int b)
>  }
>
>  /* { dg-final { scan-tree-dump "\\|" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c        (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c        (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase for PR31657.  */
>
> @@ -17,3 +17,4 @@ int f(int x, int a, int b)
>    return t;
>  }
>  /* { dg-final { scan-tree-dump "& 5" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c        (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c        (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-optimized" } */
> +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase for PR31657.  */
>  int g(void);
> @@ -18,3 +18,4 @@ int f(int x, int a, int b)
>  }
>
>  /* { dg-final { scan-tree-dump "& 5" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c        (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c        (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase for PR31657.  */
>
> @@ -17,3 +17,4 @@ int f(int x, int a, int b)
>    return t;
>  }
>  /* { dg-final { scan-tree-dump "& 5" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c        (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c        (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O1 -fdump-tree-optimized" } */
> +/* { dg-options "-O1 -fdump-tree-optimized-details-blocks" } */
>  /* { dg-additional-options "-mbranch-cost=2" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } */
>
>  _Bool f1(_Bool a, _Bool b)
> @@ -18,3 +18,4 @@ _Bool f1(_Bool a, _Bool b)
>  /* For LOGICAL_OP_NON_SHORT_CIRCUIT, this should be optimized
>     into return a & b;, with no ifs.  */
>  /* { dg-final { scan-tree-dump-not "if" "optimized" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-optimized" } */
> +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase for PR31657.  */
>
> @@ -20,3 +20,4 @@ doit:
>  }
>
>  /* { dg-final { scan-tree-dump "\\|" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-optimized" } */
> +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase extracted from PR15353.  */
>
> @@ -20,3 +20,4 @@ doit:
>  }
>
>  /* { dg-final { scan-tree-dump ">=" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-optimized" } */
> +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase extracted from PR15353.  */
>
> @@ -18,3 +18,4 @@ void foo (int x, int a)
>  }
>
>  /* { dg-final { scan-tree-dump "!=" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-optimized" } */
> +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
>
>  /* Testcase from PR15353.  */
>
> @@ -17,3 +17,4 @@ int f(int *i, int *j)
>  }
>
>  /* { dg-final { scan-tree-dump ">=" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-ifcombine" } */
> +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */
>
>  void bar (void);
>
> @@ -34,3 +34,4 @@ foo2 (unsigned int a)
>
>  /* { dg-final { scan-tree-dump "optimizing bits or bits test" "ifcombine" } } */
>  /* { dg-final { scan-tree-dump "optimizing double bit test" "ifcombine" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-ifcombine" } */
> +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */
>
>  int test1 (int i, int j)
>  {
> @@ -12,3 +12,4 @@ int test1 (int i, int j)
>  /* The above should be optimized to a i > j test by ifcombine.  */
>
>  /* { dg-final { scan-tree-dump " > " "ifcombine" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine" } */
> +/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */
>
>  double test1 (double i, double j)
>  {
> @@ -22,3 +22,4 @@ plouf:
>     Instead we get u<=, which is acceptable with -fno-trapping-math.  */
>
>  /* { dg-final { scan-tree-dump " u<= " "ifcombine" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
> Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (revision 245134)
> +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine" } */
> +/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */
>
>  void f ();
>  enum Sign { NEG=-1, ZERO, POS };
> @@ -19,3 +19,4 @@ void g (double x)
>     The transformation would also be legal with -ftrapping-math.  */
>
>  /* { dg-final { scan-tree-dump "optimizing.* < " "ifcombine" } } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
> Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c        (revision 0)
> +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c        (working copy)
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-ethread" } */
> +char *c;
> +int t()
> +{
> +  for (int i=0;i<5000;i++)
> +    c[i]=i;
> +}
> +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 "ethread"} } */
> Index: tree-ssa-ifcombine.c
> ===================================================================
> --- tree-ssa-ifcombine.c        (revision 245134)
> +++ tree-ssa-ifcombine.c        (working copy)
> @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree *
>    return true;
>  }
>
> +
> +/* Update profile after code in outer_cond_bb was adjuted so
> +   outer_cond_bb has no condition.  */
> +
> +static void
> +update_profile_after_ifcombine (basic_block inner_cond_bb,
> +                               basic_block outer_cond_bb)
> +{
> +  edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb);
> +  edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner
> +                ? EDGE_SUCC (outer_cond_bb, 1)
> +                : EDGE_SUCC (outer_cond_bb, 0));
> +  edge inner_taken = EDGE_SUCC (inner_cond_bb, 0);
> +  edge inner_not_taken = EDGE_SUCC (inner_cond_bb, 1);
> +
> +  if (inner_taken->dest != outer2->dest)
> +    std::swap (inner_taken, inner_not_taken);
> +  gcc_assert (inner_taken->dest == outer2->dest);
> +
> +  /* In the following we assume that inner_cond_bb has single predecessor.  */
> +  gcc_assert (single_pred_p (inner_cond_bb));
> +
> +  /* Path outer_cond_bb->(outer2) needs to be merged into path
> +     outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
> +     and probability of inner_not_taken updated.  */
> +
> +  outer_to_inner->count = outer_cond_bb->count;
> +  inner_cond_bb->count = outer_cond_bb->count;
> +  inner_taken->count += outer2->count;
> +  outer2->count = 0;
> +
> +  inner_taken->probability = outer2->probability
> +                            + RDIV (outer_to_inner->probability
> +                                    * inner_taken->probability,
> +                                    REG_BR_PROB_BASE);
> +  if (inner_taken->probability > REG_BR_PROB_BASE)
> +    inner_taken->probability = REG_BR_PROB_BASE;
> +  inner_not_taken->probability = REG_BR_PROB_BASE
> +                                - inner_taken->probability;
> +
> +  outer_to_inner->probability = REG_BR_PROB_BASE;
> +  inner_cond_bb->frequency = outer_cond_bb->frequency;
> +  outer2->probability = 0;
> +}
> +
>  /* If-convert on a and pattern with a common else block.  The inner
>     if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
>     inner_inv, outer_inv and result_inv indicate whether the conditions
> @@ -394,6 +439,7 @@ ifcombine_ifandif (basic_block inner_con
>         outer_inv ? boolean_false_node : boolean_true_node);
>        update_stmt (outer_cond);
> +      update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
>
>        if (dump_file)
>         {
>           fprintf (dump_file, "optimizing double bit test to ");
> @@ -471,6 +518,7 @@ ifcombine_ifandif (basic_block inner_con
>        gimple_cond_set_condition_from_tree (outer_cond,
>         outer_inv ? boolean_false_node : boolean_true_node);
>        update_stmt (outer_cond);
> +      update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
>
>        if (dump_file)
>         {
> @@ -554,6 +602,7 @@ ifcombine_ifandif (basic_block inner_con
>        gimple_cond_set_condition_from_tree (outer_cond,
>         outer_inv ? boolean_false_node : boolean_true_node);
>        update_stmt (outer_cond);
> +      update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
>
>        if (dump_file)
>         {
Jeff Law Feb. 3, 2017, 7:56 a.m. UTC | #2
On 02/02/2017 01:29 PM, Jan Hubicka wrote:
> Hi,
> this patches fixes profile updating in the ifcombine.  This is not hard to do
> and ifcombine is #2 profile update offender out of tree passes (#1 is the
> vectorizer).
>
> I think this counts as a regression, becuase one can trigger arbitrarily
> bad profile after ifconversion and defnitly construct a testcase where this
> will cause us optimize for size where we optimized for speed previously.
>
> Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after testers
> pick up the threading fix) unless there are complains.
>
> 	* gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile mismatches.
> 	* gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile mismatches.
>
> 	* tree-ssa-ifcombine.c (update_profile_after_ifcombine): New function.
> 	(ifcombine_ifandif): Use it.
>
> Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c	(revision 0)
> +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c	(working copy)
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-ethread" } */
> +char *c;
> +int t()
> +{
> +  for (int i=0;i<5000;i++)
> +    c[i]=i;
> +}
> +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 "ethread"} } */
Seems like this is unrelated to fixing profile updates in ifcombine.  If 
you want the new test it's probably best done in another independent 
patch :-)


> Index: tree-ssa-ifcombine.c
> ===================================================================
> --- tree-ssa-ifcombine.c	(revision 245134)
> +++ tree-ssa-ifcombine.c	(working copy)
> @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree *
>    return true;
>  }
>
> +
> +/* Update profile after code in outer_cond_bb was adjuted so
s/adjuted/adjusted/

OK with the nit fixed.

jeff
Christophe Lyon Feb. 6, 2017, 2:28 p.m. UTC | #3
Hi,


On 3 February 2017 at 08:56, Jeff Law <law@redhat.com> wrote:
> On 02/02/2017 01:29 PM, Jan Hubicka wrote:
>>
>> Hi,
>> this patches fixes profile updating in the ifcombine.  This is not hard to
>> do
>> and ifcombine is #2 profile update offender out of tree passes (#1 is the
>> vectorizer).
>>
>> I think this counts as a regression, becuase one can trigger arbitrarily
>> bad profile after ifconversion and defnitly construct a testcase where
>> this
>> will cause us optimize for size where we optimized for speed previously.
>>
>> Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after
>> testers
>> pick up the threading fix) unless there are complains.
>>
>>         * gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile
>> mismatches.
>>         * gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile
>> mismatches.
>>
>>         * tree-ssa-ifcombine.c (update_profile_after_ifcombine): New
>> function.
>>         (ifcombine_ifandif): Use it.
>>
>> Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c
>> ===================================================================
>> --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c        (revision 0)
>> +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c        (working copy)
>> @@ -0,0 +1,9 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-ethread" } */
>> +char *c;
>> +int t()
>> +{
>> +  for (int i=0;i<5000;i++)
>> +    c[i]=i;
>> +}
>> +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1
>> "ethread"} } */
>
> Seems like this is unrelated to fixing profile updates in ifcombine.  If you
> want the new test it's probably best done in another independent patch :-)
>
>
>> Index: tree-ssa-ifcombine.c
>> ===================================================================
>> --- tree-ssa-ifcombine.c        (revision 245134)
>> +++ tree-ssa-ifcombine.c        (working copy)
>> @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree *
>>    return true;
>>  }
>>
>> +
>> +/* Update profile after code in outer_cond_bb was adjuted so
>
> s/adjuted/adjusted/
>
> OK with the nit fixed.
>
> jeff
>

After this patch (r245151), I've noticed regressions on aarch64:
  gcc.target/aarch64/test_frame_1.c scan-assembler-times ldr\tx30,
\\[sp\\], [0-9]+ 2
  gcc.target/aarch64/test_frame_2.c scan-assembler-times ldp\tx19,
x30, \\[sp\\], [0-9]+ 1
  gcc.target/aarch64/test_frame_4.c scan-assembler-times ldp\tx19,
x30, \\[sp\\], [0-9]+ 1
  gcc.target/aarch64/test_frame_6.c scan-assembler-times ldr\tx30, \\[sp\\] 2
  gcc.target/aarch64/test_frame_7.c scan-assembler-times ldp\tx19,
x30, \\[sp\\] 1

now FAIL.

For instance in test_frame_1.c, we used to generate:
    ccmp    w2, w1, 0, eq
    bne    .L6
    ldrb    w0, [sp, 123]
    ldrb    w1, [sp, 124]
    cmp    w0, 99
    ccmp    w1, w0, 0, eq
    cset    w0, eq
.L6:
    ldr    x30, [sp], 224
    ret

and now:
    ccmp    w2, w1, 0, eq
    beq    .L11
    ldr    x30, [sp], 224
    ret
    .p2align 3
.L11:
    ldrb    w0, [sp, 123]
    ldrb    w1, [sp, 124]
    cmp    w0, 99
    ldr    x30, [sp], 224
    ccmp    w1, w0, 0, eq
    cset    w0, eq
    ret

which is 2 instructions more, as the control flow is less efficient.

Do we want to just update the tests (increasing the number of expected
str/ldr/stp/ldp to match the new code generation), or do we
consider this a regression caused by this patch?

Christophe
Jan Hubicka Feb. 6, 2017, 3:26 p.m. UTC | #4
> 
> After this patch (r245151), I've noticed regressions on aarch64:
>   gcc.target/aarch64/test_frame_1.c scan-assembler-times ldr\tx30,
> \\[sp\\], [0-9]+ 2
>   gcc.target/aarch64/test_frame_2.c scan-assembler-times ldp\tx19,
> x30, \\[sp\\], [0-9]+ 1
>   gcc.target/aarch64/test_frame_4.c scan-assembler-times ldp\tx19,
> x30, \\[sp\\], [0-9]+ 1
>   gcc.target/aarch64/test_frame_6.c scan-assembler-times ldr\tx30, \\[sp\\] 2
>   gcc.target/aarch64/test_frame_7.c scan-assembler-times ldp\tx19,
> x30, \\[sp\\] 1
> 
> now FAIL.
> 
> For instance in test_frame_1.c, we used to generate:
>     ccmp    w2, w1, 0, eq
>     bne    .L6
>     ldrb    w0, [sp, 123]
>     ldrb    w1, [sp, 124]
>     cmp    w0, 99
>     ccmp    w1, w0, 0, eq
>     cset    w0, eq
> .L6:
>     ldr    x30, [sp], 224
>     ret
> 
> and now:
>     ccmp    w2, w1, 0, eq
>     beq    .L11
>     ldr    x30, [sp], 224
>     ret
>     .p2align 3
> .L11:
>     ldrb    w0, [sp, 123]
>     ldrb    w1, [sp, 124]
>     cmp    w0, 99
>     ldr    x30, [sp], 224
>     ccmp    w1, w0, 0, eq
>     cset    w0, eq
>     ret
> 
> which is 2 instructions more, as the control flow is less efficient.
> 
> Do we want to just update the tests (increasing the number of expected
> str/ldr/stp/ldp to match the new code generation), or do we
> consider this a regression caused by this patch?

I think it is not a regression, just the testcase if fragile and depends on outcome
of ifcombine.  It seems it was updated several time in the past. I am not quite
sure what the test is testing, but probably just updating the testcase and/or
disabling the ifconvert pass for it is the right answer.

Honza
Jiong Wang Feb. 6, 2017, 3:54 p.m. UTC | #5
On 06/02/17 15:26, Jan Hubicka wrote:
> I think it is not a regression, just the testcase if fragile and depends on outcome
> of ifcombine.  It seems it was updated several time in the past. I am not quite
> sure what the test is testing.

They are tring to make sure optimal stack adjustment decisions are made.

Fix the testcases by disabling relevant transformation passes looks one way to
me.  The other way, might be more reliable, is we dump the decisions made during
aarch64 frame layout if dump_file be true, and prefix the dump entry by function
name to make it easier caught by dejagnu.  We then scan rtl dump instead of
instructions.
Richard Earnshaw (lists) Feb. 6, 2017, 4:56 p.m. UTC | #6
On 06/02/17 15:54, Jiong Wang wrote:
> On 06/02/17 15:26, Jan Hubicka wrote:
>> I think it is not a regression, just the testcase if fragile and
>> depends on outcome
>> of ifcombine.  It seems it was updated several time in the past. I am
>> not quite
>> sure what the test is testing.
> 
> They are tring to make sure optimal stack adjustment decisions are made.
> 
> Fix the testcases by disabling relevant transformation passes looks one
> way to
> me.  The other way, might be more reliable, is we dump the decisions
> made during
> aarch64 frame layout if dump_file be true, and prefix the dump entry by
> function
> name to make it easier caught by dejagnu.  We then scan rtl dump instead of
> instructions.
>  
> 

We only care that the epilogue instructions appear at least once.  So
for the epilogue we should probably just adjust the tests to use
scan-assembler, rather than scan-assembler-times.

R.
diff mbox

Patch

Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-optimized" } */
+/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase for PR31657.  */
 
@@ -14,3 +14,4 @@  int foo (int x, int a, int b)
 }
 
 /* { dg-final { scan-tree-dump "\\|" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase for PR31657.  */
 
@@ -17,3 +17,4 @@  int f(int x, int a, int b)
   return t;
 }
 /* { dg-final { scan-tree-dump "& 5" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-optimized" } */
+/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase for PR31657.  */
 int g(void);
@@ -18,3 +18,4 @@  int f(int x, int a, int b)
 }
 
 /* { dg-final { scan-tree-dump "& 5" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase for PR31657.  */
 
@@ -17,3 +17,4 @@  int f(int x, int a, int b)
   return t;
 }
 /* { dg-final { scan-tree-dump "& 5" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -fdump-tree-optimized-details-blocks" } */
 /* { dg-additional-options "-mbranch-cost=2" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } */
 
 _Bool f1(_Bool a, _Bool b)
@@ -18,3 +18,4 @@  _Bool f1(_Bool a, _Bool b)
 /* For LOGICAL_OP_NON_SHORT_CIRCUIT, this should be optimized
    into return a & b;, with no ifs.  */
 /* { dg-final { scan-tree-dump-not "if" "optimized" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-optimized" } */
+/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase for PR31657.  */
 
@@ -20,3 +20,4 @@  doit:
 }
 
 /* { dg-final { scan-tree-dump "\\|" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-optimized" } */
+/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase extracted from PR15353.  */
 
@@ -20,3 +20,4 @@  doit:
 }
 
 /* { dg-final { scan-tree-dump ">=" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-optimized" } */
+/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase extracted from PR15353.  */
 
@@ -18,3 +18,4 @@  void foo (int x, int a)
 }
 
 /* { dg-final { scan-tree-dump "!=" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-optimized" } */
+/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */
 
 /* Testcase from PR15353.  */
 
@@ -17,3 +17,4 @@  int f(int *i, int *j)
 }
 
 /* { dg-final { scan-tree-dump ">=" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-ifcombine" } */
+/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */
 
 void bar (void);
 
@@ -34,3 +34,4 @@  foo2 (unsigned int a)
 
 /* { dg-final { scan-tree-dump "optimizing bits or bits test" "ifcombine" } } */
 /* { dg-final { scan-tree-dump "optimizing double bit test" "ifcombine" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-ifcombine" } */
+/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */
 
 int test1 (int i, int j)
 {
@@ -12,3 +12,4 @@  int test1 (int i, int j)
 /* The above should be optimized to a i > j test by ifcombine.  */
 
 /* { dg-final { scan-tree-dump " > " "ifcombine" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine" } */
+/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */
 
 double test1 (double i, double j)
 {
@@ -22,3 +22,4 @@  plouf:
    Instead we get u<=, which is acceptable with -fno-trapping-math.  */
 
 /* { dg-final { scan-tree-dump " u<= " "ifcombine" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c	(revision 245134)
+++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine" } */
+/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */
 
 void f ();
 enum Sign { NEG=-1, ZERO, POS };
@@ -19,3 +19,4 @@  void g (double x)
    The transformation would also be legal with -ftrapping-math.  */
 
 /* { dg-final { scan-tree-dump "optimizing.* < " "ifcombine" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */
Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/threadbackward-1.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c	(working copy)
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ethread" } */
+char *c;
+int t()
+{
+  for (int i=0;i<5000;i++)
+    c[i]=i;
+}
+/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 "ethread"} } */
Index: tree-ssa-ifcombine.c
===================================================================
--- tree-ssa-ifcombine.c	(revision 245134)
+++ tree-ssa-ifcombine.c	(working copy)
@@ -332,6 +332,51 @@  recognize_bits_test (gcond *cond, tree *
   return true;
 }
 
+
+/* Update profile after code in outer_cond_bb was adjuted so
+   outer_cond_bb has no condition.  */
+
+static void
+update_profile_after_ifcombine (basic_block inner_cond_bb,
+			        basic_block outer_cond_bb)
+{
+  edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb);
+  edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner
+		 ? EDGE_SUCC (outer_cond_bb, 1)
+		 : EDGE_SUCC (outer_cond_bb, 0));
+  edge inner_taken = EDGE_SUCC (inner_cond_bb, 0);
+  edge inner_not_taken = EDGE_SUCC (inner_cond_bb, 1);
+  
+  if (inner_taken->dest != outer2->dest)
+    std::swap (inner_taken, inner_not_taken);
+  gcc_assert (inner_taken->dest == outer2->dest);
+
+  /* In the following we assume that inner_cond_bb has single predecessor.  */
+  gcc_assert (single_pred_p (inner_cond_bb));
+
+  /* Path outer_cond_bb->(outer2) needs to be merged into path
+     outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
+     and probability of inner_not_taken updated.  */
+
+  outer_to_inner->count = outer_cond_bb->count;
+  inner_cond_bb->count = outer_cond_bb->count;
+  inner_taken->count += outer2->count;
+  outer2->count = 0;
+
+  inner_taken->probability = outer2->probability
+			     + RDIV (outer_to_inner->probability
+				     * inner_taken->probability,
+				     REG_BR_PROB_BASE);
+  if (inner_taken->probability > REG_BR_PROB_BASE)
+    inner_taken->probability = REG_BR_PROB_BASE;
+  inner_not_taken->probability = REG_BR_PROB_BASE
+				 - inner_taken->probability;
+
+  outer_to_inner->probability = REG_BR_PROB_BASE;
+  inner_cond_bb->frequency = outer_cond_bb->frequency;
+  outer2->probability = 0;
+}
+
 /* If-convert on a and pattern with a common else block.  The inner
    if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
    inner_inv, outer_inv and result_inv indicate whether the conditions
@@ -394,6 +439,7 @@  ifcombine_ifandif (basic_block inner_con
 	outer_inv ? boolean_false_node : boolean_true_node);
       update_stmt (outer_cond);
+      update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
 
       if (dump_file)
 	{
 	  fprintf (dump_file, "optimizing double bit test to ");
@@ -471,6 +518,7 @@  ifcombine_ifandif (basic_block inner_con
       gimple_cond_set_condition_from_tree (outer_cond,
 	outer_inv ? boolean_false_node : boolean_true_node);
       update_stmt (outer_cond);
+      update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
 
       if (dump_file)
 	{
@@ -554,6 +602,7 @@  ifcombine_ifandif (basic_block inner_con
       gimple_cond_set_condition_from_tree (outer_cond,
 	outer_inv ? boolean_false_node : boolean_true_node);
       update_stmt (outer_cond);
+      update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
 
       if (dump_file)
 	{