diff mbox

Fix PR49642 in 4.6, questions about 4.7

Message ID 1326203006.13203.10.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt Jan. 10, 2012, 1:43 p.m. UTC
Greetings,

This patch follows Richard Guenther's suggestion of 2011-07-05 in
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49642 to fix the problem in
gcc 4.6.  It prevents choosing a function split point that is dominated
by a builtin call to __builtin_constant_p.

The bug was marked fixed in 4.7 since the extra FRE pass allows the
correct optimization to be done even in the presence of
__builtin_constant_p.  However, 4.7 still fails in the presence of
-fno-tree-fre.  I think we should probably include a variation of this
patch in 4.7 that only kicks in when FRE has been disabled at the
command line.  The test case would also be modified slightly to include
-fno-tree-fre in the dg-compile statement.  Thoughts?

The 4.6 patch was bootstrapped and tests cleanly on powerpc64-linux-gnu.
OK for 4.6 branch?

Thanks,
Bill


gcc:

2012-01-10  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

        PR tree-optimization/49642
        * ipa-split.c (forbidden_dominators): New variable.
        (check_forbidden_calls): New function.
        (dominated_by_forbidden): Likewise.
        (consider_split): Check for forbidden calls.
        (execute_split_functions): Initialize and free forbidden
        dominators info; call check_forbidden_calls.

gcc/testsuite:

2012-01-10  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

        PR tree-optimization/49642
        * gcc.dg/tree-ssa/pr49642.c: New test.

Comments

Richard Biener Jan. 10, 2012, 1:53 p.m. UTC | #1
On Tue, Jan 10, 2012 at 2:43 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Greetings,
>
> This patch follows Richard Guenther's suggestion of 2011-07-05 in
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49642 to fix the problem in
> gcc 4.6.  It prevents choosing a function split point that is dominated
> by a builtin call to __builtin_constant_p.
>
> The bug was marked fixed in 4.7 since the extra FRE pass allows the
> correct optimization to be done even in the presence of
> __builtin_constant_p.  However, 4.7 still fails in the presence of
> -fno-tree-fre.  I think we should probably include a variation of this
> patch in 4.7 that only kicks in when FRE has been disabled at the
> command line.  The test case would also be modified slightly to include
> -fno-tree-fre in the dg-compile statement.  Thoughts?

I think it should be unconditionally restrict splitting (I suppose on the
trunk the __builtin_constant_p is optimized away already).

Btw, this will also disqualify any point below

 if (__builtin_constant_p (...))
   {
     ...
   }

because after the if join all BBs are dominated by the __builtin_constant_p
call.  What we want to disallow is splitting at a block that is dominated
by the true edge of the condition fed by the __builtin_constant_p result ...

Honza?

> The 4.6 patch was bootstrapped and tests cleanly on powerpc64-linux-gnu.
> OK for 4.6 branch?
>
> Thanks,
> Bill
>
>
> gcc:
>
> 2012-01-10  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/49642
>        * ipa-split.c (forbidden_dominators): New variable.
>        (check_forbidden_calls): New function.
>        (dominated_by_forbidden): Likewise.
>        (consider_split): Check for forbidden calls.
>        (execute_split_functions): Initialize and free forbidden
>        dominators info; call check_forbidden_calls.
>
> gcc/testsuite:
>
> 2012-01-10  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/49642
>        * gcc.dg/tree-ssa/pr49642.c: New test.
>
>
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr49642.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
> @@ -0,0 +1,49 @@
> +/* Verify that ipa-split is disabled following __builtin_constant_p.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +typedef unsigned int u32;
> +typedef unsigned long long u64;
> +
> +static inline __attribute__((always_inline)) __attribute__((const))
> +int __ilog2_u32(u32 n)
> +{
> + int bit;
> + asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
> + return 31 - bit;
> +}
> +
> +
> +static inline __attribute__((always_inline)) __attribute__((const))
> +int __ilog2_u64(u64 n)
> +{
> + int bit;
> + asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n));
> + return 63 - bit;
> +}
> +
> +
> +
> +static u64 ehca_map_vaddr(void *caddr);
> +
> +struct ehca_shca {
> +        u32 hca_cap_mr_pgsize;
> +};
> +
> +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
> +{
> + return 1UL << ( __builtin_constant_p(shca->hca_cap_mr_pgsize) ? ( (shca->hca_cap_mr_pgsize) < 1 ? ____ilog2_NaN() : (shca->hca_cap_mr_pgsize) & (1ULL << 63) ? 63 : (shca->hca_cap_mr_pgsize) & (1ULL << 62) ? 62 : (shca->hca_cap_mr_pgsize) & (1ULL << 61) ? 61 : (shca->hca_cap_mr_pgsize) & (1ULL << 60) ? 60 : (shca->hca_cap_mr_pgsize) & (1ULL << 59) ? 59 : (shca->hca_cap_mr_pgsize) & (1ULL << 58) ? 58 : (shca->hca_cap_mr_pgsize) & (1ULL << 57) ? 57 : (shca->hca_cap_mr_pgsize) & (1ULL << 56) ? 56 : (shca->hca_cap_mr_pgsize) & (1ULL << 55) ? 55 : (shca->hca_cap_mr_pgsize) & (1ULL << 54) ? 54 : (shca->hca_cap_mr_pgsize) & (1ULL << 53) ? 53 : (shca->hca_cap_mr_pgsize) & (1ULL << 52) ? 52 : (shca->hca_cap_mr_pgsize) & (1ULL << 51) ? 51 : (shca->hca_cap_mr_pgsize) & (1ULL << 50) ? 50 : (shca->hca_cap_mr_pgsize) & (1ULL << 49) ? 49 : (shca->hca_cap_mr_pgsize) & (1ULL << 48) ? 48 : (shca->hca_cap_mr_pgsize) & (1ULL << 47) ? 47 : (shca->hca_cap_mr_pgsize) & (1ULL << 46) ? 46 : (shca->hca_cap_mr_pgsize) & (1ULL << 45) ? 45 : (shca->hca_cap_mr_pgsize) & (1ULL << 44) ? 44 : (shca->hca_cap_mr_pgsize) & (1ULL << 43) ? 43 : (shca->hca_cap_mr_pgsize) & (1ULL << 42) ? 42 : (shca->hca_cap_mr_pgsize) & (1ULL << 41) ? 41 : (shca->hca_cap_mr_pgsize) & (1ULL << 40) ? 40 : (shca->hca_cap_mr_pgsize) & (1ULL << 39) ? 39 : (shca->hca_cap_mr_pgsize) & (1ULL << 38) ? 38 : (shca->hca_cap_mr_pgsize) & (1ULL << 37) ? 37 : (shca->hca_cap_mr_pgsize) & (1ULL << 36) ? 36 : (shca->hca_cap_mr_pgsize) & (1ULL << 35) ? 35 : (shca->hca_cap_mr_pgsize) & (1ULL << 34) ? 34 : (shca->hca_cap_mr_pgsize) & (1ULL << 33) ? 33 : (shca->hca_cap_mr_pgsize) & (1ULL << 32) ? 32 : (shca->hca_cap_mr_pgsize) & (1ULL << 31) ? 31 : (shca->hca_cap_mr_pgsize) & (1ULL << 30) ? 30 : (shca->hca_cap_mr_pgsize) & (1ULL << 29) ? 29 : (shca->hca_cap_mr_pgsize) & (1ULL << 28) ? 28 : (shca->hca_cap_mr_pgsize) & (1ULL << 27) ? 27 : (shca->hca_cap_mr_pgsize) & (1ULL << 26) ? 26 : (shca->hca_cap_mr_pgsize) & (1ULL << 25) ? 25 : (shca->hca_cap_mr_pgsize) & (1ULL << 24) ? 24 : (shca->hca_cap_mr_pgsize) & (1ULL << 23) ? 23 : (shca->hca_cap_mr_pgsize) & (1ULL << 22) ? 22 : (shca->hca_cap_mr_pgsize) & (1ULL << 21) ? 21 : (shca->hca_cap_mr_pgsize) & (1ULL << 20) ? 20 : (shca->hca_cap_mr_pgsize) & (1ULL << 19) ? 19 : (shca->hca_cap_mr_pgsize) & (1ULL << 18) ? 18 : (shca->hca_cap_mr_pgsize) & (1ULL << 17) ? 17 : (shca->hca_cap_mr_pgsize) & (1ULL << 16) ? 16 : (shca->hca_cap_mr_pgsize) & (1ULL << 15) ? 15 : (shca->hca_cap_mr_pgsize) & (1ULL << 14) ? 14 : (shca->hca_cap_mr_pgsize) & (1ULL << 13) ? 13 : (shca->hca_cap_mr_pgsize) & (1ULL << 12) ? 12 : (shca->hca_cap_mr_pgsize) & (1ULL << 11) ? 11 : (shca->hca_cap_mr_pgsize) & (1ULL << 10) ? 10 : (shca->hca_cap_mr_pgsize) & (1ULL << 9) ? 9 : (shca->hca_cap_mr_pgsize) & (1ULL << 8) ? 8 : (shca->hca_cap_mr_pgsize) & (1ULL << 7) ? 7 : (shca->hca_cap_mr_pgsize) & (1ULL << 6) ? 6 : (shca->hca_cap_mr_pgsize) & (1ULL << 5) ? 5 : (shca->hca_cap_mr_pgsize) & (1ULL << 4) ? 4 : (shca->hca_cap_mr_pgsize) & (1ULL << 3) ? 3 : (shca->hca_cap_mr_pgsize) & (1ULL << 2) ? 2 : (shca->hca_cap_mr_pgsize) & (1ULL << 1) ? 1 : (shca->hca_cap_mr_pgsize) & (1ULL << 0) ? 0 : ____ilog2_NaN() ) : (sizeof(shca->hca_cap_mr_pgsize) <= 4) ? __ilog2_u32(shca->hca_cap_mr_pgsize) : __ilog2_u64(shca->hca_cap_mr_pgsize) );
> +}
> +
> +int x(struct ehca_shca *shca) {
> +        return ehca_get_max_hwpage_size(shca);
> +}
> +
> +int y(struct ehca_shca *shca)
> +{
> +        return ehca_get_max_hwpage_size(shca);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "____ilog2_NaN" 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/ipa-split.c
> ===================================================================
> --- gcc/ipa-split.c     (revision 183033)
> +++ gcc/ipa-split.c     (working copy)
> @@ -130,6 +130,10 @@ struct split_point
>
>  struct split_point best_split_point;
>
> +/* Set of basic blocks that are not allowed to dominate a split point.  */
> +
> +bitmap forbidden_dominators;
> +
>  static tree find_retval (basic_block return_bb);
>
>  /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
> @@ -270,6 +274,49 @@ done:
>   return ok;
>  }
>
> +/* If STMT is a call, check the callee against a list of forbidden
> +   functions.  If a match is found, set the bit for block BB in the
> +   forbidden dominators bitmap.  The purpose of this is to avoid
> +   selecting a split point where we are likely to lose the chance
> +   to optimize away an unused function call.  */
> +
> +static void
> +check_forbidden_calls (gimple stmt, basic_block bb)
> +{
> +  tree fndecl;
> +
> +  if (!is_gimple_call (stmt))
> +    return;
> +
> +  fndecl = gimple_call_fndecl (stmt);
> +
> +  if (fndecl
> +      && TREE_CODE (fndecl) == FUNCTION_DECL
> +      && DECL_BUILT_IN (fndecl)
> +      /* At the moment, __builtin_constant_p is the only forbidden
> +        function call (see PR49642).  */
> +      && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
> +    bitmap_set_bit (forbidden_dominators, bb->index);
> +}
> +
> +/* If BB is dominated by any block in the forbidden dominators set,
> +   return TRUE; else FALSE.  */
> +
> +static bool
> +dominated_by_forbidden (basic_block bb)
> +{
> +  unsigned dom_bb;
> +  bitmap_iterator bi;
> +
> +  EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
> +    {
> +      if (dominated_by_p (CDI_DOMINATORS, bb, BASIC_BLOCK (dom_bb)))
> +       return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* We found an split_point CURRENT.  NON_SSA_VARS is bitmap of all non ssa
>    variables used and RETURN_BB is return basic block.
>    See if we can split function here.  */
> @@ -411,6 +458,18 @@ consider_split (struct split_point *current, bitma
>                 "  Refused: split part has non-ssa uses\n");
>       return;
>     }
> +
> +  /* If the split point is dominated by a forbidden function call,
> +     reject the split.  */
> +  if (!bitmap_empty_p (forbidden_dominators)
> +      && dominated_by_forbidden (current->entry_bb))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file,
> +                "  Refused: split point dominated by forbidden call\n");
> +      return;
> +    }
> +
>   /* See if retval used by return bb is computed by header or split part.
>      When it is computed by split part, we need to produce return statement
>      in the split part and add code to header to pass it around.
> @@ -1329,6 +1388,10 @@ execute_split_functions (void)
>       return 0;
>     }
>
> +  /* Initialize bitmap to track forbidden calls.  */
> +  forbidden_dominators = BITMAP_ALLOC (NULL);
> +  calculate_dominance_info (CDI_DOMINATORS);
> +
>   /* Compute local info about basic blocks and determine function size/time.  */
>   VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
>   memset (&best_split_point, 0, sizeof (best_split_point));
> @@ -1350,6 +1413,7 @@ execute_split_functions (void)
>          this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
>          size += this_size;
>          time += this_time;
> +         check_forbidden_calls (stmt, bb);
>
>          if (dump_file && (dump_flags & TDF_DETAILS))
>            {
> @@ -1371,6 +1435,7 @@ execute_split_functions (void)
>       BITMAP_FREE (best_split_point.split_bbs);
>       todo = TODO_update_ssa | TODO_cleanup_cfg;
>     }
> +  BITMAP_FREE (forbidden_dominators);
>   VEC_free (bb_info, heap, bb_info_vec);
>   bb_info_vec = NULL;
>   return todo;
>
>
>
Bill Schmidt Jan. 10, 2012, 3:42 p.m. UTC | #2
On Tue, 2012-01-10 at 14:53 +0100, Richard Guenther wrote:
> On Tue, Jan 10, 2012 at 2:43 PM, William J. Schmidt
> <wschmidt@linux.vnet.ibm.com> wrote:
> > Greetings,
> >
> > This patch follows Richard Guenther's suggestion of 2011-07-05 in
> > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49642 to fix the problem in
> > gcc 4.6.  It prevents choosing a function split point that is dominated
> > by a builtin call to __builtin_constant_p.
> >
> > The bug was marked fixed in 4.7 since the extra FRE pass allows the
> > correct optimization to be done even in the presence of
> > __builtin_constant_p.  However, 4.7 still fails in the presence of
> > -fno-tree-fre.  I think we should probably include a variation of this
> > patch in 4.7 that only kicks in when FRE has been disabled at the
> > command line.  The test case would also be modified slightly to include
> > -fno-tree-fre in the dg-compile statement.  Thoughts?
> 
> I think it should be unconditionally restrict splitting (I suppose on the
> trunk the __builtin_constant_p is optimized away already).

OK.  Yes, on trunk it is optimized away (when FRE is not disabled).
Having the logic unconditional is fine with me; I'd like to use
-fno-tree-fre in the test case so it actually gets tested, though.  Or
have two variants, one with, one without.

> 
> Btw, this will also disqualify any point below
> 
>  if (__builtin_constant_p (...))
>    {
>      ...
>    }
> 
> because after the if join all BBs are dominated by the __builtin_constant_p
> call.  What we want to disallow is splitting at a block that is dominated
> by the true edge of the condition fed by the __builtin_constant_p result ...

True.  What we have is:

  D.1899_68 = __builtin_constant_p (D.1898_67);
  if (D.1899_68 != 0)
    goto <bb 3>;
  else
    goto <bb 133>;

So I suppose we have to walk the immediate uses of the LHS of the call,
find all that are part of a condition, and mark the target block for
nonzero (in this case <bb 3>) as a forbidden dominator.  I can tighten
this up.

> 
> Honza?
> 
> > The 4.6 patch was bootstrapped and tests cleanly on powerpc64-linux-gnu.
> > OK for 4.6 branch?
> >
> > Thanks,
> > Bill
> >
> >
> > gcc:
> >
> > 2012-01-10  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> >
> >        PR tree-optimization/49642
> >        * ipa-split.c (forbidden_dominators): New variable.
> >        (check_forbidden_calls): New function.
> >        (dominated_by_forbidden): Likewise.
> >        (consider_split): Check for forbidden calls.
> >        (execute_split_functions): Initialize and free forbidden
> >        dominators info; call check_forbidden_calls.
> >
> > gcc/testsuite:
> >
> > 2012-01-10  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> >
> >        PR tree-optimization/49642
> >        * gcc.dg/tree-ssa/pr49642.c: New test.
> >
> >
> > Index: gcc/testsuite/gcc.dg/tree-ssa/pr49642.c
> > ===================================================================
> > --- gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
> > +++ gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
> > @@ -0,0 +1,49 @@
> > +/* Verify that ipa-split is disabled following __builtin_constant_p.  */
> > +
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +typedef unsigned int u32;
> > +typedef unsigned long long u64;
> > +
> > +static inline __attribute__((always_inline)) __attribute__((const))
> > +int __ilog2_u32(u32 n)
> > +{
> > + int bit;
> > + asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
> > + return 31 - bit;
> > +}
> > +
> > +
> > +static inline __attribute__((always_inline)) __attribute__((const))
> > +int __ilog2_u64(u64 n)
> > +{
> > + int bit;
> > + asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n));
> > + return 63 - bit;
> > +}
> > +
> > +
> > +
> > +static u64 ehca_map_vaddr(void *caddr);
> > +
> > +struct ehca_shca {
> > +        u32 hca_cap_mr_pgsize;
> > +};
> > +
> > +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
> > +{
> > + return 1UL << ( __builtin_constant_p(shca->hca_cap_mr_pgsize) ? ( (shca->hca_cap_mr_pgsize) < 1 ? ____ilog2_NaN() : (shca->hca_cap_mr_pgsize) & (1ULL << 63) ? 63 : (shca->hca_cap_mr_pgsize) & (1ULL << 62) ? 62 : (shca->hca_cap_mr_pgsize) & (1ULL << 61) ? 61 : (shca->hca_cap_mr_pgsize) & (1ULL << 60) ? 60 : (shca->hca_cap_mr_pgsize) & (1ULL << 59) ? 59 : (shca->hca_cap_mr_pgsize) & (1ULL << 58) ? 58 : (shca->hca_cap_mr_pgsize) & (1ULL << 57) ? 57 : (shca->hca_cap_mr_pgsize) & (1ULL << 56) ? 56 : (shca->hca_cap_mr_pgsize) & (1ULL << 55) ? 55 : (shca->hca_cap_mr_pgsize) & (1ULL << 54) ? 54 : (shca->hca_cap_mr_pgsize) & (1ULL << 53) ? 53 : (shca->hca_cap_mr_pgsize) & (1ULL << 52) ? 52 : (shca->hca_cap_mr_pgsize) & (1ULL << 51) ? 51 : (shca->hca_cap_mr_pgsize) & (1ULL << 50) ? 50 : (shca->hca_cap_mr_pgsize) & (1ULL << 49) ? 49 : (shca->hca_cap_mr_pgsize) & (1ULL << 48) ? 48 : (shca->hca_cap_mr_pgsize) & (1ULL << 47) ? 47 : (shca->hca_cap_mr_pgsize) & (1ULL << 46) ? 46 : (shca->hca_cap_mr_pgsize) & (1ULL << 45) ? 45 : (shca->hca_cap_mr_pgsize) & (1ULL << 44) ? 44 : (shca->hca_cap_mr_pgsize) & (1ULL << 43) ? 43 : (shca->hca_cap_mr_pgsize) & (1ULL << 42) ? 42 : (shca->hca_cap_mr_pgsize) & (1ULL << 41) ? 41 : (shca->hca_cap_mr_pgsize) & (1ULL << 40) ? 40 : (shca->hca_cap_mr_pgsize) & (1ULL << 39) ? 39 : (shca->hca_cap_mr_pgsize) & (1ULL << 38) ? 38 : (shca->hca_cap_mr_pgsize) & (1ULL << 37) ? 37 : (shca->hca_cap_mr_pgsize) & (1ULL << 36) ? 36 : (shca->hca_cap_mr_pgsize) & (1ULL << 35) ? 35 : (shca->hca_cap_mr_pgsize) & (1ULL << 34) ? 34 : (shca->hca_cap_mr_pgsize) & (1ULL << 33) ? 33 : (shca->hca_cap_mr_pgsize) & (1ULL << 32) ? 32 : (shca->hca_cap_mr_pgsize) & (1ULL << 31) ? 31 : (shca->hca_cap_mr_pgsize) & (1ULL << 30) ? 30 : (shca->hca_cap_mr_pgsize) & (1ULL << 29) ? 29 : (shca->hca_cap_mr_pgsize) & (1ULL << 28) ? 28 : (shca->hca_cap_mr_pgsize) & (1ULL << 27) ? 27 : (shca->hca_cap_mr_pgsize) & (1ULL << 26) ? 26 : (shca->hca_cap_mr_pgsize) & (1ULL << 25) ? 25 : (shca->hca_cap_mr_pgsize) & (1ULL << 24) ? 24 : (shca->hca_cap_mr_pgsize) & (1ULL << 23) ? 23 : (shca->hca_cap_mr_pgsize) & (1ULL << 22) ? 22 : (shca->hca_cap_mr_pgsize) & (1ULL << 21) ? 21 : (shca->hca_cap_mr_pgsize) & (1ULL << 20) ? 20 : (shca->hca_cap_mr_pgsize) & (1ULL << 19) ? 19 : (shca->hca_cap_mr_pgsize) & (1ULL << 18) ? 18 : (shca->hca_cap_mr_pgsize) & (1ULL << 17) ? 17 : (shca->hca_cap_mr_pgsize) & (1ULL << 16) ? 16 : (shca->hca_cap_mr_pgsize) & (1ULL << 15) ? 15 : (shca->hca_cap_mr_pgsize) & (1ULL << 14) ? 14 : (shca->hca_cap_mr_pgsize) & (1ULL << 13) ? 13 : (shca->hca_cap_mr_pgsize) & (1ULL << 12) ? 12 : (shca->hca_cap_mr_pgsize) & (1ULL << 11) ? 11 : (shca->hca_cap_mr_pgsize) & (1ULL << 10) ? 10 : (shca->hca_cap_mr_pgsize) & (1ULL << 9) ? 9 : (shca->hca_cap_mr_pgsize) & (1ULL << 8) ? 8 : (shca->hca_cap_mr_pgsize) & (1ULL << 7) ? 7 : (shca->hca_cap_mr_pgsize) & (1ULL << 6) ? 6 : (shca->hca_cap_mr_pgsize) & (1ULL << 5) ? 5 : (shca->hca_cap_mr_pgsize) & (1ULL << 4) ? 4 : (shca->hca_cap_mr_pgsize) & (1ULL << 3) ? 3 : (shca->hca_cap_mr_pgsize) & (1ULL << 2) ? 2 : (shca->hca_cap_mr_pgsize) & (1ULL << 1) ? 1 : (shca->hca_cap_mr_pgsize) & (1ULL << 0) ? 0 : ____ilog2_NaN() ) : (sizeof(shca->hca_cap_mr_pgsize) <= 4) ? __ilog2_u32(shca->hca_cap_mr_pgsize) : __ilog2_u64(shca->hca_cap_mr_pgsize) );
> > +}
> > +
> > +int x(struct ehca_shca *shca) {
> > +        return ehca_get_max_hwpage_size(shca);
> > +}
> > +
> > +int y(struct ehca_shca *shca)
> > +{
> > +        return ehca_get_max_hwpage_size(shca);
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "____ilog2_NaN" 0 "optimized" } } */
> > +/* { dg-final { cleanup-tree-dump "optimized" } } */
> > Index: gcc/ipa-split.c
> > ===================================================================
> > --- gcc/ipa-split.c     (revision 183033)
> > +++ gcc/ipa-split.c     (working copy)
> > @@ -130,6 +130,10 @@ struct split_point
> >
> >  struct split_point best_split_point;
> >
> > +/* Set of basic blocks that are not allowed to dominate a split point.  */
> > +
> > +bitmap forbidden_dominators;
> > +
> >  static tree find_retval (basic_block return_bb);
> >
> >  /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
> > @@ -270,6 +274,49 @@ done:
> >   return ok;
> >  }
> >
> > +/* If STMT is a call, check the callee against a list of forbidden
> > +   functions.  If a match is found, set the bit for block BB in the
> > +   forbidden dominators bitmap.  The purpose of this is to avoid
> > +   selecting a split point where we are likely to lose the chance
> > +   to optimize away an unused function call.  */
> > +
> > +static void
> > +check_forbidden_calls (gimple stmt, basic_block bb)
> > +{
> > +  tree fndecl;
> > +
> > +  if (!is_gimple_call (stmt))
> > +    return;
> > +
> > +  fndecl = gimple_call_fndecl (stmt);
> > +
> > +  if (fndecl
> > +      && TREE_CODE (fndecl) == FUNCTION_DECL
> > +      && DECL_BUILT_IN (fndecl)
> > +      /* At the moment, __builtin_constant_p is the only forbidden
> > +        function call (see PR49642).  */
> > +      && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
> > +    bitmap_set_bit (forbidden_dominators, bb->index);
> > +}
> > +
> > +/* If BB is dominated by any block in the forbidden dominators set,
> > +   return TRUE; else FALSE.  */
> > +
> > +static bool
> > +dominated_by_forbidden (basic_block bb)
> > +{
> > +  unsigned dom_bb;
> > +  bitmap_iterator bi;
> > +
> > +  EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
> > +    {
> > +      if (dominated_by_p (CDI_DOMINATORS, bb, BASIC_BLOCK (dom_bb)))
> > +       return true;
> > +    }
> > +
> > +  return false;
> > +}
> > +
> >  /* We found an split_point CURRENT.  NON_SSA_VARS is bitmap of all non ssa
> >    variables used and RETURN_BB is return basic block.
> >    See if we can split function here.  */
> > @@ -411,6 +458,18 @@ consider_split (struct split_point *current, bitma
> >                 "  Refused: split part has non-ssa uses\n");
> >       return;
> >     }
> > +
> > +  /* If the split point is dominated by a forbidden function call,
> > +     reject the split.  */
> > +  if (!bitmap_empty_p (forbidden_dominators)
> > +      && dominated_by_forbidden (current->entry_bb))
> > +    {
> > +      if (dump_file && (dump_flags & TDF_DETAILS))
> > +       fprintf (dump_file,
> > +                "  Refused: split point dominated by forbidden call\n");
> > +      return;
> > +    }
> > +
> >   /* See if retval used by return bb is computed by header or split part.
> >      When it is computed by split part, we need to produce return statement
> >      in the split part and add code to header to pass it around.
> > @@ -1329,6 +1388,10 @@ execute_split_functions (void)
> >       return 0;
> >     }
> >
> > +  /* Initialize bitmap to track forbidden calls.  */
> > +  forbidden_dominators = BITMAP_ALLOC (NULL);
> > +  calculate_dominance_info (CDI_DOMINATORS);
> > +
> >   /* Compute local info about basic blocks and determine function size/time.  */
> >   VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
> >   memset (&best_split_point, 0, sizeof (best_split_point));
> > @@ -1350,6 +1413,7 @@ execute_split_functions (void)
> >          this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
> >          size += this_size;
> >          time += this_time;
> > +         check_forbidden_calls (stmt, bb);
> >
> >          if (dump_file && (dump_flags & TDF_DETAILS))
> >            {
> > @@ -1371,6 +1435,7 @@ execute_split_functions (void)
> >       BITMAP_FREE (best_split_point.split_bbs);
> >       todo = TODO_update_ssa | TODO_cleanup_cfg;
> >     }
> > +  BITMAP_FREE (forbidden_dominators);
> >   VEC_free (bb_info, heap, bb_info_vec);
> >   bb_info_vec = NULL;
> >   return todo;
> >
> >
> >
>
Jan Hubicka Jan. 11, 2012, 11:40 a.m. UTC | #3
> I think it should be unconditionally restrict splitting (I suppose on the
> trunk the __builtin_constant_p is optimized away already).
> 
> Btw, this will also disqualify any point below
> 
>  if (__builtin_constant_p (...))
>    {
>      ...
>    }
> 
> because after the if join all BBs are dominated by the __builtin_constant_p
> call.  What we want to disallow is splitting at a block that is dominated
> by the true edge of the condition fed by the __builtin_constant_p result ....
> 
> Honza?
Well, just for record, the final version of patch seems to make sense for me ;)
Thanks! It is an interesting side corner to say at least.

Honza
Jan Hubicka Jan. 11, 2012, 11:44 a.m. UTC | #4
> Well, just for record, the final version of patch seems to make sense for me ;)
> Thanks! It is an interesting side corner to say at least.

Of course one could craft an function with two builtin_constant_p calls and
the asm statement that is not dominated by either of them but still always constant.
I am not sure how far we want to go guaranting that constant propagation
happen. I think the simple dominator cases are enough in practice.

Honza
> 
> Honza
diff mbox

Patch

Index: gcc/testsuite/gcc.dg/tree-ssa/pr49642.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr49642.c     (revision 0)
@@ -0,0 +1,49 @@ 
+/* Verify that ipa-split is disabled following __builtin_constant_p.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+static inline __attribute__((always_inline)) __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+ int bit;
+ asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
+ return 31 - bit;
+}
+
+
+static inline __attribute__((always_inline)) __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+ int bit;
+ asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n));
+ return 63 - bit;
+}
+
+
+
+static u64 ehca_map_vaddr(void *caddr);
+
+struct ehca_shca {
+        u32 hca_cap_mr_pgsize;
+};
+
+static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
+{
+ return 1UL << ( __builtin_constant_p(shca->hca_cap_mr_pgsize) ? ( (shca->hca_cap_mr_pgsize) < 1 ? ____ilog2_NaN() : (shca->hca_cap_mr_pgsize) & (1ULL << 63) ? 63 : (shca->hca_cap_mr_pgsize) & (1ULL << 62) ? 62 : (shca->hca_cap_mr_pgsize) & (1ULL << 61) ? 61 : (shca->hca_cap_mr_pgsize) & (1ULL << 60) ? 60 : (shca->hca_cap_mr_pgsize) & (1ULL << 59) ? 59 : (shca->hca_cap_mr_pgsize) & (1ULL << 58) ? 58 : (shca->hca_cap_mr_pgsize) & (1ULL << 57) ? 57 : (shca->hca_cap_mr_pgsize) & (1ULL << 56) ? 56 : (shca->hca_cap_mr_pgsize) & (1ULL << 55) ? 55 : (shca->hca_cap_mr_pgsize) & (1ULL << 54) ? 54 : (shca->hca_cap_mr_pgsize) & (1ULL << 53) ? 53 : (shca->hca_cap_mr_pgsize) & (1ULL << 52) ? 52 : (shca->hca_cap_mr_pgsize) & (1ULL << 51) ? 51 : (shca->hca_cap_mr_pgsize) & (1ULL << 50) ? 50 : (shca->hca_cap_mr_pgsize) & (1ULL << 49) ? 49 : (shca->hca_cap_mr_pgsize) & (1ULL << 48) ? 48 : (shca->hca_cap_mr_pgsize) & (1ULL << 47) ? 47 : (shca->hca_cap_mr_pgsize) & (1ULL << 46) ? 46 : (shca->hca_cap_mr_pgsize) & (1ULL << 45) ? 45 : (shca->hca_cap_mr_pgsize) & (1ULL << 44) ? 44 : (shca->hca_cap_mr_pgsize) & (1ULL << 43) ? 43 : (shca->hca_cap_mr_pgsize) & (1ULL << 42) ? 42 : (shca->hca_cap_mr_pgsize) & (1ULL << 41) ? 41 : (shca->hca_cap_mr_pgsize) & (1ULL << 40) ? 40 : (shca->hca_cap_mr_pgsize) & (1ULL << 39) ? 39 : (shca->hca_cap_mr_pgsize) & (1ULL << 38) ? 38 : (shca->hca_cap_mr_pgsize) & (1ULL << 37) ? 37 : (shca->hca_cap_mr_pgsize) & (1ULL << 36) ? 36 : (shca->hca_cap_mr_pgsize) & (1ULL << 35) ? 35 : (shca->hca_cap_mr_pgsize) & (1ULL << 34) ? 34 : (shca->hca_cap_mr_pgsize) & (1ULL << 33) ? 33 : (shca->hca_cap_mr_pgsize) & (1ULL << 32) ? 32 : (shca->hca_cap_mr_pgsize) & (1ULL << 31) ? 31 : (shca->hca_cap_mr_pgsize) & (1ULL << 30) ? 30 : (shca->hca_cap_mr_pgsize) & (1ULL << 29) ? 29 : (shca->hca_cap_mr_pgsize) & (1ULL << 28) ? 28 : (shca->hca_cap_mr_pgsize) & (1ULL << 27) ? 27 : (shca->hca_cap_mr_pgsize) & (1ULL << 26) ? 26 : (shca->hca_cap_mr_pgsize) & (1ULL << 25) ? 25 : (shca->hca_cap_mr_pgsize) & (1ULL << 24) ? 24 : (shca->hca_cap_mr_pgsize) & (1ULL << 23) ? 23 : (shca->hca_cap_mr_pgsize) & (1ULL << 22) ? 22 : (shca->hca_cap_mr_pgsize) & (1ULL << 21) ? 21 : (shca->hca_cap_mr_pgsize) & (1ULL << 20) ? 20 : (shca->hca_cap_mr_pgsize) & (1ULL << 19) ? 19 : (shca->hca_cap_mr_pgsize) & (1ULL << 18) ? 18 : (shca->hca_cap_mr_pgsize) & (1ULL << 17) ? 17 : (shca->hca_cap_mr_pgsize) & (1ULL << 16) ? 16 : (shca->hca_cap_mr_pgsize) & (1ULL << 15) ? 15 : (shca->hca_cap_mr_pgsize) & (1ULL << 14) ? 14 : (shca->hca_cap_mr_pgsize) & (1ULL << 13) ? 13 : (shca->hca_cap_mr_pgsize) & (1ULL << 12) ? 12 : (shca->hca_cap_mr_pgsize) & (1ULL << 11) ? 11 : (shca->hca_cap_mr_pgsize) & (1ULL << 10) ? 10 : (shca->hca_cap_mr_pgsize) & (1ULL << 9) ? 9 : (shca->hca_cap_mr_pgsize) & (1ULL << 8) ? 8 : (shca->hca_cap_mr_pgsize) & (1ULL << 7) ? 7 : (shca->hca_cap_mr_pgsize) & (1ULL << 6) ? 6 : (shca->hca_cap_mr_pgsize) & (1ULL << 5) ? 5 : (shca->hca_cap_mr_pgsize) & (1ULL << 4) ? 4 : (shca->hca_cap_mr_pgsize) & (1ULL << 3) ? 3 : (shca->hca_cap_mr_pgsize) & (1ULL << 2) ? 2 : (shca->hca_cap_mr_pgsize) & (1ULL << 1) ? 1 : (shca->hca_cap_mr_pgsize) & (1ULL << 0) ? 0 : ____ilog2_NaN() ) : (sizeof(shca->hca_cap_mr_pgsize) <= 4) ? __ilog2_u32(shca->hca_cap_mr_pgsize) : __ilog2_u64(shca->hca_cap_mr_pgsize) );
+}
+
+int x(struct ehca_shca *shca) {
+        return ehca_get_max_hwpage_size(shca);
+}
+
+int y(struct ehca_shca *shca)
+{
+        return ehca_get_max_hwpage_size(shca);
+}
+
+/* { dg-final { scan-tree-dump-times "____ilog2_NaN" 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/ipa-split.c
===================================================================
--- gcc/ipa-split.c     (revision 183033)
+++ gcc/ipa-split.c     (working copy)
@@ -130,6 +130,10 @@  struct split_point
 
 struct split_point best_split_point;
 
+/* Set of basic blocks that are not allowed to dominate a split point.  */
+
+bitmap forbidden_dominators;
+
 static tree find_retval (basic_block return_bb);
 
 /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
@@ -270,6 +274,49 @@  done:
   return ok;
 }
 
+/* If STMT is a call, check the callee against a list of forbidden
+   functions.  If a match is found, set the bit for block BB in the
+   forbidden dominators bitmap.  The purpose of this is to avoid
+   selecting a split point where we are likely to lose the chance
+   to optimize away an unused function call.  */
+
+static void
+check_forbidden_calls (gimple stmt, basic_block bb)
+{
+  tree fndecl;
+
+  if (!is_gimple_call (stmt))
+    return;
+
+  fndecl = gimple_call_fndecl (stmt);
+
+  if (fndecl
+      && TREE_CODE (fndecl) == FUNCTION_DECL
+      && DECL_BUILT_IN (fndecl)
+      /* At the moment, __builtin_constant_p is the only forbidden
+        function call (see PR49642).  */
+      && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
+    bitmap_set_bit (forbidden_dominators, bb->index);
+}
+
+/* If BB is dominated by any block in the forbidden dominators set,
+   return TRUE; else FALSE.  */
+
+static bool
+dominated_by_forbidden (basic_block bb)
+{
+  unsigned dom_bb;
+  bitmap_iterator bi;
+
+  EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
+    {
+      if (dominated_by_p (CDI_DOMINATORS, bb, BASIC_BLOCK (dom_bb)))
+       return true;
+    }
+
+  return false;
+}
+
 /* We found an split_point CURRENT.  NON_SSA_VARS is bitmap of all non ssa
    variables used and RETURN_BB is return basic block.
    See if we can split function here.  */
@@ -411,6 +458,18 @@  consider_split (struct split_point *current, bitma
                 "  Refused: split part has non-ssa uses\n");
       return;
     }
+
+  /* If the split point is dominated by a forbidden function call,
+     reject the split.  */
+  if (!bitmap_empty_p (forbidden_dominators)
+      && dominated_by_forbidden (current->entry_bb))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file,
+                "  Refused: split point dominated by forbidden call\n");
+      return;
+    }
+
   /* See if retval used by return bb is computed by header or split part.
      When it is computed by split part, we need to produce return statement
      in the split part and add code to header to pass it around.
@@ -1329,6 +1388,10 @@  execute_split_functions (void)
       return 0;
     }
 
+  /* Initialize bitmap to track forbidden calls.  */
+  forbidden_dominators = BITMAP_ALLOC (NULL);
+  calculate_dominance_info (CDI_DOMINATORS);
+
   /* Compute local info about basic blocks and determine function size/time.  */
   VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
   memset (&best_split_point, 0, sizeof (best_split_point));
@@ -1350,6 +1413,7 @@  execute_split_functions (void)
          this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
          size += this_size;
          time += this_time;
+         check_forbidden_calls (stmt, bb);
 
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
@@ -1371,6 +1435,7 @@  execute_split_functions (void)
       BITMAP_FREE (best_split_point.split_bbs);
       todo = TODO_update_ssa | TODO_cleanup_cfg;
     }
+  BITMAP_FREE (forbidden_dominators);
   VEC_free (bb_info, heap, bb_info_vec);
   bb_info_vec = NULL;
   return todo;