diff mbox series

Find constant definition for by-ref argument using dominance information (PR ipa/90401)

Message ID BYAPR01MB486953A14F5B500F35FEB295F7160@BYAPR01MB4869.prod.exchangelabs.com
State New
Headers show
Series Find constant definition for by-ref argument using dominance information (PR ipa/90401) | expand

Commit Message

Feng Xue OS June 5, 2019, 8:59 a.m. UTC
IPA-CP can not identify a constant by-ref argument to a function, if definition
of the argument is not in same basic block where the callsite lies in. This is
because IPA-CP only does local search in the callsite basic block.So this patch
implemented an enhanced algorithm, which uses dominance information to guide
traverse in virtual SSA web, to find out constant definitions in dominating basic
block.

Feng

---

Comments

Richard Biener June 5, 2019, 10:15 a.m. UTC | #1
On Wed, Jun 5, 2019 at 10:59 AM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> IPA-CP can not identify a constant by-ref argument to a function, if definition
> of the argument is not in same basic block where the callsite lies in. This is
> because IPA-CP only does local search in the callsite basic block.So this patch
> implemented an enhanced algorithm, which uses dominance information to guide
> traverse in virtual SSA web, to find out constant definitions in dominating basic
> block.
>
> Feng
>
> ---
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 526ed45be89..cc076c337af 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,14 @@
> +2019-06-04  Feng Xue  <fxue@os.amperecomputing.com>
> +
> +       PR ipa/90401
> +       * ipa-prop.c (add_to_agg_contents_list): New function.
> +       (clobber_by_agg_contents_list_p): Likewise.
> +       (extract_mem_content): Likewise.
> +       (strictly_dominated_by_ssa_p): Likewise.
> +       (get_place_in_agg_contents_list): Delete.
> +       (determine_known_aggregate_parts): Renamed from
> +       determine_locally_known_aggregate_parts.
> +
>  2019-06-04  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         * config/rs6000/constraints.md (define_register_constraint "wp"):
> diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
> index d86c2f3db55..405cdf7730b 100644
> --- a/gcc/ipa-prop.c
> +++ b/gcc/ipa-prop.c
> @@ -1458,7 +1458,7 @@ get_ssa_def_if_simple_copy (tree rhs)
>    return rhs;
>  }
>
> -/* Simple linked list, describing known contents of an aggregate beforere
> +/* Simple linked list, describing known contents of an aggregate before
>     call.  */
>
>  struct ipa_known_agg_contents_list
> @@ -1471,41 +1471,64 @@ struct ipa_known_agg_contents_list
>    struct ipa_known_agg_contents_list *next;
>  };
>
> -/* Find the proper place in linked list of ipa_known_agg_contents_list
> -   structures where to put a new one with the given LHS_OFFSET and LHS_SIZE,
> -   unless there is a partial overlap, in which case return NULL, or such
> -   element is already there, in which case set *ALREADY_THERE to true.  */
> +/* Add a known content item into a linked list of ipa_known_agg_contents_list,
> +   in which all elements are sorted ascendingly by offset. When ALLOW_DUP is
> +   false, insert the item only if there is no duplicate one (with same offset
> +   and size) in the list. And if the item is added, return true. */
>
> -static struct ipa_known_agg_contents_list **
> -get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list,
> -                               HOST_WIDE_INT lhs_offset,
> -                               HOST_WIDE_INT lhs_size,
> -                               bool *already_there)
> +static inline bool
> +add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist,
> +                          struct ipa_known_agg_contents_list *item,
> +                          bool allow_dup = true)
>  {
> -  struct ipa_known_agg_contents_list **p = list;
> -  while (*p && (*p)->offset < lhs_offset)
> +  struct ipa_known_agg_contents_list *list = *plist;
> +
> +  for (; list; list = list->next)
>      {
> -      if ((*p)->offset + (*p)->size > lhs_offset)
> -       return NULL;
> -      p = &(*p)->next;
> +      if (list->offset > item->offset)
> +        break;
> +
> +      if (list->offset == item->offset && list->size == item->size
> +          && !allow_dup)
> +        return false;
> +
> +      plist = &list->next;
>      }
>
> -  if (*p && (*p)->offset < lhs_offset + lhs_size)
> +  item->next = list;
> +  *plist = item;
> +  return true;
> +}
> +
> +/* Check whether a given known content is clobbered by certain element in
> +   a linked list of ipa_known_agg_contents_list. A special case is that
> +   we can ignore those constant items completely same as the given item,
> +   that is they have same offset/size/value. */
> +
> +static inline bool
> +clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list,
> +                                struct ipa_known_agg_contents_list *item)
> +{
> +  for (; list; list = list->next)
>      {
> -      if ((*p)->offset == lhs_offset && (*p)->size == lhs_size)
> -       /* We already know this value is subsequently overwritten with
> -          something else.  */
> -       *already_there = true;
> -      else
> -       /* Otherwise this is a partial overlap which we cannot
> -          represent.  */
> -       return NULL;
> +      if (list->offset > item->offset)
> +        return list->offset < item->offset + item->size;
> +
> +      /* For the constant item, we can skip comparison with identical items in
> +         the list, because its content remains unchanged after clobbering. */
> +      if (list->offset == item->offset && list->size == item->size
> +          && list->constant && item->constant
> +          && operand_equal_p (list->constant, item->constant, 0))
> +        continue;
> +
> +      if (list->offset + list->size > item->offset)
> +        return true;
>      }
> -  return p;
> +  return false;
>  }
>
>  /* Build aggregate jump function from LIST, assuming there are exactly
> -   CONST_COUNT constant entries there and that th offset of the passed argument
> +   CONST_COUNT constant entries there and that offset of the passed argument
>     is ARG_OFFSET and store it into JFUNC.  */
>
>  static void
> @@ -1528,6 +1551,66 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
>      }
>  }
>
> +/* If STMT is a memory store to the object whose address is BASE, extract
> +   information (offset, size, and value) into CONTENT, and return true,
> +   otherwise we conservatively assume the whole object is modified with
> +   unknown content, and return false. CHECK_REF means that access to object
> +   is expected to be in form of MEM_REF expression. */
> +
> +static bool
> +extract_mem_content (gimple *stmt, tree base, bool check_ref,
> +                     struct ipa_known_agg_contents_list *content)
> +{
> +  HOST_WIDE_INT lhs_offset, lhs_size;
> +  tree lhs, rhs, lhs_base;
> +  bool reverse;
> +
> +  if (!gimple_assign_single_p (stmt))
> +    return false;
> +
> +  lhs = gimple_assign_lhs (stmt);
> +  rhs = gimple_assign_rhs1 (stmt);
> +
> +  if (!is_gimple_reg_type (TREE_TYPE (rhs))
> +      || TREE_CODE (lhs) == BIT_FIELD_REF
> +      || contains_bitfld_component_ref_p (lhs))
> +    return false;
> +
> +  lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
> +                                          &lhs_size, &reverse);
> +  if (!lhs_base)
> +    return false;
> +
> +  if (check_ref)
> +    {
> +      if (TREE_CODE (lhs_base) != MEM_REF
> +          || TREE_OPERAND (lhs_base, 0) != base
> +          || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
> +        return false;
> +    }
> +  else if (lhs_base != base)
> +    return false;
> +
> +  rhs = get_ssa_def_if_simple_copy (rhs);
> +
> +  content->size = lhs_size;
> +  content->offset = lhs_offset;
> +  content->constant = is_gimple_ip_invariant (rhs) ? rhs : NULL_TREE;
> +  content->next = NULL;
> +
> +  return true;
> +}
> +
> +/* Return true if defintion of SSA name strictly dominates basic block BB. */
> +
> +static inline bool
> +strictly_dominated_by_ssa_p (basic_block bb, tree ssa)
> +{
> +  basic_block ssa_bb = gimple_bb (SSA_NAME_DEF_STMT (ssa));
> +
> +  return bb != ssa_bb && dominated_by_p (CDI_DOMINATORS, bb, ssa_bb);
> +}
> +
>  /* Traverse statements from CALL backwards, scanning whether an aggregate given
>     in ARG is filled in with constant values.  ARG can either be an aggregate
>     expression or a pointer to an aggregate.  ARG_TYPE is the type of the
> @@ -1535,19 +1618,22 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
>     subsequently stored.  */
>
>  static void
> -determine_locally_known_aggregate_parts (gcall *call, tree arg,
> -                                        tree arg_type,
> -                                        struct ipa_jump_func *jfunc)
> +determine_known_aggregate_parts (gcall *call, tree arg,
> +                                tree arg_type,
> +                                struct ipa_jump_func *jfunc)
>  {
> -  struct ipa_known_agg_contents_list *list = NULL;
> +  struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL;
> +  basic_block call_bb = gimple_bb (call);
> +  hash_set <tree> visited;
> +  auto_vec <tree> worklist;
>    int item_count = 0, const_count = 0;
> +  int ipa_max_agg_items = PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS);
>    HOST_WIDE_INT arg_offset, arg_size;
> -  gimple_stmt_iterator gsi;
>    tree arg_base;
>    bool check_ref, by_ref;
>    ao_ref r;
>
> -  if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0)
> +  if (ipa_max_agg_items == 0)
>      return;
>
>    /* The function operates in three stages.  First, we prepare check_ref, r,
> @@ -1606,82 +1692,139 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg,
>        ao_ref_init (&r, arg);
>      }
>
> -  /* Second stage walks back the BB, looks at individual statements and as long
> -     as it is confident of how the statements affect contents of the
> -     aggregates, it builds a sorted linked list of ipa_agg_jf_list structures
> -     describing it.  */
> -  gsi = gsi_for_stmt (call);
> -  gsi_prev (&gsi);
> -  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
> -    {
> -      struct ipa_known_agg_contents_list *n, **p;
> -      gimple *stmt = gsi_stmt (gsi);
> -      HOST_WIDE_INT lhs_offset, lhs_size;
> -      tree lhs, rhs, lhs_base;
> -      bool reverse;
> -
> -      if (!stmt_may_clobber_ref_p_1 (stmt, &r))
> -       continue;
> -      if (!gimple_assign_single_p (stmt))
> -       break;
> -
> -      lhs = gimple_assign_lhs (stmt);
> -      rhs = gimple_assign_rhs1 (stmt);
> -      if (!is_gimple_reg_type (TREE_TYPE (rhs))
> -         || TREE_CODE (lhs) == BIT_FIELD_REF
> -         || contains_bitfld_component_ref_p (lhs))
> -       break;
> -
> -      lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
> -                                             &lhs_size, &reverse);
> -      if (!lhs_base)
> -       break;
> -
> -      if (check_ref)
> -       {
> -         if (TREE_CODE (lhs_base) != MEM_REF
> -             || TREE_OPERAND (lhs_base, 0) != arg_base
> -             || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
> -           break;
> -       }
> -      else if (lhs_base != arg_base)
> -       {
> -         if (DECL_P (lhs_base))
> -           continue;
> -         else
> -           break;
> -       }
> -
> -      bool already_there = false;
> -      p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size,
> -                                         &already_there);
> -      if (!p)
> -       break;
> -      if (already_there)
> -       continue;
> -
> -      rhs = get_ssa_def_if_simple_copy (rhs);
> -      n = XALLOCA (struct ipa_known_agg_contents_list);
> -      n->size = lhs_size;
> -      n->offset = lhs_offset;
> -      if (is_gimple_ip_invariant (rhs))
> -       {
> -         n->constant = rhs;
> -         const_count++;
> -       }
> -      else
> -       n->constant = NULL_TREE;
> -      n->next = *p;
> -      *p = n;
> -
> -      item_count++;
> -      if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)
> -         || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
> -       break;
> -    }
> +  /* Second stage walks back from the call statement, looks at individual
> +     virtual operand and as long as it is confident of how definition
> +     statement of the virtual operand affects content of the aggregate, it
> +     builds a sorted linked list of ipa_agg_jf_list structures describing it.
> +
> +     Actually, the stage involves kind of global reachability analysis on
> +     in-memory variables definitions. A complete resolution requires iterative
> +     dataflow equation computation, which is somewhat complex and seems to be
> +     overkill to this collecting-constant task.
> +
> +     So here we use a simpler algorithm that we believe can archive nearly
> +     same result in most situations. In the algorithm, we traverse virtual SSA
> +     web backwards starting from the call, by stepping from one dominating
> +     virtual operand (its definition dominates the call) to another dominating
> +     one. Before a dom virtual operand is processed, we will ensure all those
> +     non-dom virtual operands, which are backwards reachable from previous dom
> +     virtual operand, have been processed. But only dom virtual operand is
> +     regarded as possible constant value provider to the call argument. And
> +     non-dom virtual operand is just checked to see whether it kills
> +     definitions of some dom virtual operands. */
> +
> +  for (tree dom_vuse = gimple_vuse (call); dom_vuse; )
> +    {
> +      gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse);
> +      bool exist = visited.add (dom_vuse);
> +
> +      gcc_assert (!exist);
> +
> +      if (gimple_code (stmt) != GIMPLE_PHI)
> +        {
> +          if (stmt_may_clobber_ref_p_1 (stmt, &r))
> +            {
> +              struct ipa_known_agg_contents_list *content =
> +                         XALLOCA (struct ipa_known_agg_contents_list);
> +
> +              if (!extract_mem_content (stmt, arg_base, check_ref, content))
> +                break;
> +
> +              /* Now we get a dom virtual operand, and need to check whether
> +                 its value can finally reach the call. And put it into the
> +                 content list only if it is unique. */
> +              if (!clobber_by_agg_contents_list_p (all_list, content)
> +                  && add_to_agg_contents_list (&list, content, false))
> +                {
> +                  struct ipa_known_agg_contents_list *copy =
> +                             XALLOCA (struct ipa_known_agg_contents_list);
> +
> +                  *copy = *content;
> +                  content = copy;
> +
> +                  item_count +=1;
> +                  const_count += !!(content->constant);
> +
> +                  if (item_count == 2 * ipa_max_agg_items
> +                      || const_count == ipa_max_agg_items)
> +                    break;
> +                }
> +              add_to_agg_contents_list (&all_list, content);
> +            }
> +          dom_vuse = gimple_vuse (stmt);
> +          continue;
> +        }
> +
> +      /* We get to a merge point in virtual SSA web, then go through all
> +         non-dom virtual operands before handling next dom virtual operand. */
> +      worklist.safe_push (dom_vuse);
> +      dom_vuse = NULL_TREE;
> +
> +      do
> +        {
> +          tree vuse = worklist.pop ();
> +          gimple *stmt = SSA_NAME_DEF_STMT (vuse);
> +          auto_vec<tree, 6> append;
> +
> +          /* Non-dom virtual operand should not be the DEFAULT definition. */
> +          gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (vuse));
> +
> +          if (gimple_code (stmt) != GIMPLE_PHI)
> +            {
> +              if (stmt_may_clobber_ref_p_1 (stmt, &r))
> +                {
> +                  struct ipa_known_agg_contents_list *content =
> +                             XALLOCA (struct ipa_known_agg_contents_list);
> +
> +                  if (!extract_mem_content (stmt, arg_base, check_ref, content))
> +                    goto finish;
> +
> +                  add_to_agg_contents_list (&all_list, content);

I don't think your PHI handling works correct.  If you have

   agg.part1 = 0;
   if ()
     agg.part2 = 1;
   call (agg);

then you seem to end up above for agg.part2 because you push that onto the
worklist below.

> +                }
> +              append.safe_push (gimple_vuse (stmt));
> +            }
> +          else
> +            {
> +              for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i)
> +                {
> +                  tree phi_arg = gimple_phi_arg_def (stmt, i);
> +
> +                  if (SSA_NAME_IS_DEFAULT_DEF (phi_arg))
> +                    goto finish;
> +
> +                  append.safe_push (phi_arg);

Not sure why you first push to append and then move parts to the
worklist below - it seems to only complicate code.

What you want to do is (probably) use get_continuation_for_phi
which for a virtual PHI node and a reference computes a
virtual operand you can continue your processing with.  Thus sth
like

   bitmap vistied = NULL;
   dom_vuse = get_continuation_for_phi (stmt, &ref, counter /* limit walking */,
  &visited, false, NULL, NULL);
  if (visited)
    BITMAP_FREE (visited);

and have ref be

  ao_ref ref = ao_ref_init (arg);

where you then can remove the worklist alltogether.

You need to limit work you want to do, otherwise it becomes quite
expensive - assign an overall limit of alias queries, get_continuation_for_phi
will decrement counter for each query it does and return NULL if
counter reaches zero.  Each extract_mem_content call should be
accounted as well.

Richard.

> +                }
> +            }
> +
> +          for (unsigned i = 0; i < append.length (); ++i)
> +            {
> +              if (visited.add (vuse = append[i]))
> +                continue;
> +
> +              if (SSA_NAME_IS_DEFAULT_DEF (vuse)
> +                  || strictly_dominated_by_ssa_p (call_bb, vuse))
> +                {
> +                  /* Found a new dom virtual operand, stop going further until
> +                     all pending non-dom virtual operands are processed. */
> +                  gcc_assert (!dom_vuse);
> +                  dom_vuse = vuse;
> +                }
> +              else
> +                worklist.safe_push (vuse);
> +            }
> +        } while (!worklist.is_empty ());
> +
> +      gcc_assert (dom_vuse);
> +
> +      /* It is asserted that unprocessed dom virtual operand should be in
> +         non-visited state. */
> +      visited.remove (dom_vuse);
> +    }
> +
> +finish:
>
>    /* Third stage just goes over the list and creates an appropriate vector of
> -     ipa_agg_jf_item structures out of it, of sourse only if there are
> +     ipa_agg_jf_item structures out of it, of course only if there are
>       any known constants to begin with.  */
>
>    if (const_count)
> @@ -1691,6 +1834,7 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg,
>      }
>  }
>
> +
>  /* Return the Ith param type of callee associated with call graph
>     edge E.  */
>
> @@ -1797,7 +1941,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enum value_range_kind type,
>    jf->m_vr = ipa_get_value_range (type, min, max);
>  }
>
> -/* Assign to JF a pointer to a value_range just liek TMP but either fetch a
> +/* Assign to JF a pointer to a value_range just like TMP but either fetch a
>     copy from ipa_vr_hash_table or allocate a new on in GC memory.  */
>
>  static void
> @@ -1978,7 +2122,7 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi,
>               || !ipa_get_jf_ancestor_agg_preserved (jfunc))
>           && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
>               || POINTER_TYPE_P (param_type)))
> -       determine_locally_known_aggregate_parts (call, arg, param_type, jfunc);
> +       determine_known_aggregate_parts (call, arg, param_type, jfunc);
>      }
>    if (!useful_context)
>      vec_free (args->polymorphic_call_contexts);
> diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> new file mode 100644
> index 00000000000..131c4e265bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> @@ -0,0 +1,78 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */
> +
> +int data1;
> +
> +int callee1(int *v)
> +{
> +  if (*v < 2)
> +    return 0;
> +  else
> +    {
> +      int t = data1;
> +
> +      data1 = *v;
> +      *v = t;
> +
> +      return 1;
> +    }
> +}
> +
> +int __attribute((pure)) callee2(int *v)
> +{
> +  if (*v < 2)
> +    return 0;
> +  else
> +    {
> +      data1 = v[0] + v[2];
> +
> +      return 1;
> +    }
> +}
> +
> +int caller1(int c, int *r)
> +{
> +  int a = 1;
> +
> +  if (c)
> +    return callee1(&a);
> +  else
> +    {
> +      *r = 2;
> +      return callee1(r);
> +    }
> +}
> +
> +int data2[200];
> +int data3;
> +
> +int __attribute((const)) gen_cond(int);
> +
> +int caller2(void)
> +{
> +  int i, j;
> +  int sum = 0;
> +  int a[8];
> +
> +  a[0] = 3;
> +  for (i = 0; i < 100; i++)
> +    {
> +      if (gen_cond (i))
> +        continue;
> +
> +      a[2] = 4;
> +      for (j = 0; j < 100; j++)
> +        {
> +          data2[i + j] = (i ^ j) + data3;
> +
> +          sum += callee2(a);
> +        }
> +    }
> +
> +  return sum;
> +}
> +
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 1" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 2" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 3" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 64, cst: 4" 1 "cp" } } */
>
Feng Xue OS June 6, 2019, 8:54 a.m. UTC | #2
> I don't think your PHI handling works correct.  If you have

>    agg.part1 = 0;
>    if ()
>      agg.part2 = 1;
>    call (agg);

> then you seem to end up above for agg.part2 because you push that onto the
> worklist below.

It is correct.

o. worklist is used to collect all non-dom virtual operands.  agg.part2 is collected
into worklist, after it is taken from worklist, and processed,  we find the next dom 
virtual operands agg.part1.
    [the statement "dom_vuse = vuse"]. 

o. Execution transfers to the start of main loop, which processes dom virtual
operands, and does overlap analysis between agg.part1 and agg.part2. 
    [the statement if (!clobber_by_agg_contents_list_p())]

>> +                }
>> +              append.safe_push (gimple_vuse (stmt));
>> +            }
>> +          else
>> +            {
>> +              for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i)
>> +                {
>> +                  tree phi_arg = gimple_phi_arg_def (stmt, i);
>> +
>> +                  if (SSA_NAME_IS_DEFAULT_DEF (phi_arg))
>> +                    goto finish;
>> +
>> +                  append.safe_push (phi_arg);

> Not sure why you first push to append and then move parts to the
> worklist below - it seems to only complicate code.

It is just a code trick. I do not want to duplicate below codes for both
normal virtual SSA and PHI virtual SSA.

>> +            {
>> +              if (visited.add (vuse = append[i]))
>> +                continue;
>> +
>> +              if (SSA_NAME_IS_DEFAULT_DEF (vuse)
>> +                  || strictly_dominated_by_ssa_p (call_bb, vuse))
>> +                {
>> +                  /* Found a new dom virtual operand, stop going further until
>> +                     all pending non-dom virtual operands are processed. */
>> +                  gcc_assert (!dom_vuse);
>> +                  dom_vuse = vuse;
>> +                }
>> +              else
>> +                worklist.safe_push (vuse);
>> +            }


> What you want to do is (probably) use get_continuation_for_phi
> which for a virtual PHI node and a reference computes a
> virtual operand you can continue your processing with.  Thus sth
> like

I looked though the function. Yes, it does nearly same thing as this patch requires.
But a minor difference,  get_continuation_for_phi() does not translate virtual
operands in back edge.  Then, if rewriting with the function, we can not handle the
following case.

    /* assume no overlap between agg.part1 and agg.part2 */

    __attribute__((pure)) call();
    agg.part1 = 0;
    for (...)
      {
        call(agg);
        agg.part2 = 1;
      }

No sure this restrict is to prevent revisit the start virtual SSA or due to
some other consideration.

Thanks for comments.

Feng
Richard Biener June 6, 2019, 9:34 a.m. UTC | #3
On Thu, Jun 6, 2019 at 10:54 AM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> > I don't think your PHI handling works correct.  If you have
>
> >    agg.part1 = 0;
> >    if ()
> >      agg.part2 = 1;
> >    call (agg);
>
> > then you seem to end up above for agg.part2 because you push that onto the
> > worklist below.
>
> It is correct.
>
> o. worklist is used to collect all non-dom virtual operands.  agg.part2 is collected
> into worklist, after it is taken from worklist, and processed,  we find the next dom
> virtual operands agg.part1.
>     [the statement "dom_vuse = vuse"].
>
> o. Execution transfers to the start of main loop, which processes dom virtual
> operands, and does overlap analysis between agg.part1 and agg.part2.
>     [the statement if (!clobber_by_agg_contents_list_p())]
>
> >> +                }
> >> +              append.safe_push (gimple_vuse (stmt));
> >> +            }
> >> +          else
> >> +            {
> >> +              for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i)
> >> +                {
> >> +                  tree phi_arg = gimple_phi_arg_def (stmt, i);
> >> +
> >> +                  if (SSA_NAME_IS_DEFAULT_DEF (phi_arg))
> >> +                    goto finish;
> >> +
> >> +                  append.safe_push (phi_arg);
>
> > Not sure why you first push to append and then move parts to the
> > worklist below - it seems to only complicate code.
>
> It is just a code trick. I do not want to duplicate below codes for both
> normal virtual SSA and PHI virtual SSA.
>
> >> +            {
> >> +              if (visited.add (vuse = append[i]))
> >> +                continue;
> >> +
> >> +              if (SSA_NAME_IS_DEFAULT_DEF (vuse)
> >> +                  || strictly_dominated_by_ssa_p (call_bb, vuse))
> >> +                {
> >> +                  /* Found a new dom virtual operand, stop going further until
> >> +                     all pending non-dom virtual operands are processed. */
> >> +                  gcc_assert (!dom_vuse);
> >> +                  dom_vuse = vuse;
> >> +                }
> >> +              else
> >> +                worklist.safe_push (vuse);
> >> +            }
>
>
> > What you want to do is (probably) use get_continuation_for_phi
> > which for a virtual PHI node and a reference computes a
> > virtual operand you can continue your processing with.  Thus sth
> > like
>
> I looked though the function. Yes, it does nearly same thing as this patch requires.
> But a minor difference,  get_continuation_for_phi() does not translate virtual
> operands in back edge.  Then, if rewriting with the function, we can not handle the
> following case.
>
>     /* assume no overlap between agg.part1 and agg.part2 */
>
>     __attribute__((pure)) call();
>     agg.part1 = 0;
>     for (...)
>       {
>         call(agg);
>         agg.part2 = 1;
>       }
>
> No sure this restrict is to prevent revisit the start virtual SSA or due to
> some other consideration.

It certainly can handle this situation, you do not need the "translate" hook.

Richard.

>
> Thanks for comments.
>
> Feng
diff mbox series

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 526ed45be89..cc076c337af 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@ 
+2019-06-04  Feng Xue  <fxue@os.amperecomputing.com>
+
+	PR ipa/90401
+	* ipa-prop.c (add_to_agg_contents_list): New function.
+	(clobber_by_agg_contents_list_p): Likewise.
+	(extract_mem_content): Likewise.
+	(strictly_dominated_by_ssa_p): Likewise.
+	(get_place_in_agg_contents_list): Delete.
+	(determine_known_aggregate_parts): Renamed from
+	determine_locally_known_aggregate_parts.
+
 2019-06-04  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	* config/rs6000/constraints.md (define_register_constraint "wp"):
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index d86c2f3db55..405cdf7730b 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -1458,7 +1458,7 @@  get_ssa_def_if_simple_copy (tree rhs)
   return rhs;
 }
 
-/* Simple linked list, describing known contents of an aggregate beforere
+/* Simple linked list, describing known contents of an aggregate before
    call.  */
 
 struct ipa_known_agg_contents_list
@@ -1471,41 +1471,64 @@  struct ipa_known_agg_contents_list
   struct ipa_known_agg_contents_list *next;
 };
 
-/* Find the proper place in linked list of ipa_known_agg_contents_list
-   structures where to put a new one with the given LHS_OFFSET and LHS_SIZE,
-   unless there is a partial overlap, in which case return NULL, or such
-   element is already there, in which case set *ALREADY_THERE to true.  */
+/* Add a known content item into a linked list of ipa_known_agg_contents_list,
+   in which all elements are sorted ascendingly by offset. When ALLOW_DUP is
+   false, insert the item only if there is no duplicate one (with same offset
+   and size) in the list. And if the item is added, return true. */
 
-static struct ipa_known_agg_contents_list **
-get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list,
-				HOST_WIDE_INT lhs_offset,
-				HOST_WIDE_INT lhs_size,
-				bool *already_there)
+static inline bool
+add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist,
+                          struct ipa_known_agg_contents_list *item,
+                          bool allow_dup = true)
 {
-  struct ipa_known_agg_contents_list **p = list;
-  while (*p && (*p)->offset < lhs_offset)
+  struct ipa_known_agg_contents_list *list = *plist;
+
+  for (; list; list = list->next)
     {
-      if ((*p)->offset + (*p)->size > lhs_offset)
-	return NULL;
-      p = &(*p)->next;
+      if (list->offset > item->offset)
+        break;
+
+      if (list->offset == item->offset && list->size == item->size
+          && !allow_dup)
+        return false;
+
+      plist = &list->next;
     }
 
-  if (*p && (*p)->offset < lhs_offset + lhs_size)
+  item->next = list;
+  *plist = item;
+  return true;
+}
+
+/* Check whether a given known content is clobbered by certain element in
+   a linked list of ipa_known_agg_contents_list. A special case is that
+   we can ignore those constant items completely same as the given item,
+   that is they have same offset/size/value. */
+
+static inline bool
+clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list,
+                                struct ipa_known_agg_contents_list *item)
+{
+  for (; list; list = list->next)
     {
-      if ((*p)->offset == lhs_offset && (*p)->size == lhs_size)
-	/* We already know this value is subsequently overwritten with
-	   something else.  */
-	*already_there = true;
-      else
-	/* Otherwise this is a partial overlap which we cannot
-	   represent.  */
-	return NULL;
+      if (list->offset > item->offset)
+        return list->offset < item->offset + item->size;
+
+      /* For the constant item, we can skip comparison with identical items in
+         the list, because its content remains unchanged after clobbering. */
+      if (list->offset == item->offset && list->size == item->size
+          && list->constant && item->constant
+          && operand_equal_p (list->constant, item->constant, 0))
+        continue;
+
+      if (list->offset + list->size > item->offset)
+        return true;
     }
-  return p;
+  return false;
 }
 
 /* Build aggregate jump function from LIST, assuming there are exactly
-   CONST_COUNT constant entries there and that th offset of the passed argument
+   CONST_COUNT constant entries there and that offset of the passed argument
    is ARG_OFFSET and store it into JFUNC.  */
 
 static void
@@ -1528,6 +1551,66 @@  build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
     }
 }
 
+/* If STMT is a memory store to the object whose address is BASE, extract
+   information (offset, size, and value) into CONTENT, and return true,
+   otherwise we conservatively assume the whole object is modified with
+   unknown content, and return false. CHECK_REF means that access to object
+   is expected to be in form of MEM_REF expression. */
+
+static bool
+extract_mem_content (gimple *stmt, tree base, bool check_ref,
+                     struct ipa_known_agg_contents_list *content)
+{
+  HOST_WIDE_INT lhs_offset, lhs_size;
+  tree lhs, rhs, lhs_base;
+  bool reverse;
+
+  if (!gimple_assign_single_p (stmt))
+    return false;
+
+  lhs = gimple_assign_lhs (stmt);
+  rhs = gimple_assign_rhs1 (stmt);
+
+  if (!is_gimple_reg_type (TREE_TYPE (rhs))
+      || TREE_CODE (lhs) == BIT_FIELD_REF
+      || contains_bitfld_component_ref_p (lhs))
+    return false;
+
+  lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
+                                          &lhs_size, &reverse);
+  if (!lhs_base)
+    return false;
+
+  if (check_ref)
+    {
+      if (TREE_CODE (lhs_base) != MEM_REF
+          || TREE_OPERAND (lhs_base, 0) != base
+          || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
+        return false;
+    }
+  else if (lhs_base != base)
+    return false;
+
+  rhs = get_ssa_def_if_simple_copy (rhs);
+
+  content->size = lhs_size;
+  content->offset = lhs_offset;
+  content->constant = is_gimple_ip_invariant (rhs) ? rhs : NULL_TREE;
+  content->next = NULL;
+
+  return true;
+}
+
+/* Return true if defintion of SSA name strictly dominates basic block BB. */
+
+static inline bool
+strictly_dominated_by_ssa_p (basic_block bb, tree ssa)
+{
+  basic_block ssa_bb = gimple_bb (SSA_NAME_DEF_STMT (ssa));
+
+  return bb != ssa_bb && dominated_by_p (CDI_DOMINATORS, bb, ssa_bb);
+}
+
 /* Traverse statements from CALL backwards, scanning whether an aggregate given
    in ARG is filled in with constant values.  ARG can either be an aggregate
    expression or a pointer to an aggregate.  ARG_TYPE is the type of the
@@ -1535,19 +1618,22 @@  build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
    subsequently stored.  */
 
 static void
-determine_locally_known_aggregate_parts (gcall *call, tree arg,
-					 tree arg_type,
-					 struct ipa_jump_func *jfunc)
+determine_known_aggregate_parts (gcall *call, tree arg,
+				 tree arg_type,
+				 struct ipa_jump_func *jfunc)
 {
-  struct ipa_known_agg_contents_list *list = NULL;
+  struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL;
+  basic_block call_bb = gimple_bb (call);
+  hash_set <tree> visited;
+  auto_vec <tree> worklist;
   int item_count = 0, const_count = 0;
+  int ipa_max_agg_items = PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS);
   HOST_WIDE_INT arg_offset, arg_size;
-  gimple_stmt_iterator gsi;
   tree arg_base;
   bool check_ref, by_ref;
   ao_ref r;
 
-  if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0)
+  if (ipa_max_agg_items == 0)
     return;
 
   /* The function operates in three stages.  First, we prepare check_ref, r,
@@ -1606,82 +1692,139 @@  determine_locally_known_aggregate_parts (gcall *call, tree arg,
       ao_ref_init (&r, arg);
     }
 
-  /* Second stage walks back the BB, looks at individual statements and as long
-     as it is confident of how the statements affect contents of the
-     aggregates, it builds a sorted linked list of ipa_agg_jf_list structures
-     describing it.  */
-  gsi = gsi_for_stmt (call);
-  gsi_prev (&gsi);
-  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
-    {
-      struct ipa_known_agg_contents_list *n, **p;
-      gimple *stmt = gsi_stmt (gsi);
-      HOST_WIDE_INT lhs_offset, lhs_size;
-      tree lhs, rhs, lhs_base;
-      bool reverse;
-
-      if (!stmt_may_clobber_ref_p_1 (stmt, &r))
-	continue;
-      if (!gimple_assign_single_p (stmt))
-	break;
-
-      lhs = gimple_assign_lhs (stmt);
-      rhs = gimple_assign_rhs1 (stmt);
-      if (!is_gimple_reg_type (TREE_TYPE (rhs))
-	  || TREE_CODE (lhs) == BIT_FIELD_REF
-	  || contains_bitfld_component_ref_p (lhs))
-	break;
-
-      lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
-					      &lhs_size, &reverse);
-      if (!lhs_base)
-	break;
-
-      if (check_ref)
-	{
-	  if (TREE_CODE (lhs_base) != MEM_REF
-	      || TREE_OPERAND (lhs_base, 0) != arg_base
-	      || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
-	    break;
-	}
-      else if (lhs_base != arg_base)
-	{
-	  if (DECL_P (lhs_base))
-	    continue;
-	  else
-	    break;
-	}
-
-      bool already_there = false;
-      p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size,
-					  &already_there);
-      if (!p)
-	break;
-      if (already_there)
-	continue;
-
-      rhs = get_ssa_def_if_simple_copy (rhs);
-      n = XALLOCA (struct ipa_known_agg_contents_list);
-      n->size = lhs_size;
-      n->offset = lhs_offset;
-      if (is_gimple_ip_invariant (rhs))
-	{
-	  n->constant = rhs;
-	  const_count++;
-	}
-      else
-	n->constant = NULL_TREE;
-      n->next = *p;
-      *p = n;
-
-      item_count++;
-      if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)
-	  || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
-	break;
-    }
+  /* Second stage walks back from the call statement, looks at individual
+     virtual operand and as long as it is confident of how definition
+     statement of the virtual operand affects content of the aggregate, it
+     builds a sorted linked list of ipa_agg_jf_list structures describing it.
+
+     Actually, the stage involves kind of global reachability analysis on
+     in-memory variables definitions. A complete resolution requires iterative
+     dataflow equation computation, which is somewhat complex and seems to be
+     overkill to this collecting-constant task.
+
+     So here we use a simpler algorithm that we believe can archive nearly
+     same result in most situations. In the algorithm, we traverse virtual SSA
+     web backwards starting from the call, by stepping from one dominating
+     virtual operand (its definition dominates the call) to another dominating
+     one. Before a dom virtual operand is processed, we will ensure all those
+     non-dom virtual operands, which are backwards reachable from previous dom
+     virtual operand, have been processed. But only dom virtual operand is
+     regarded as possible constant value provider to the call argument. And
+     non-dom virtual operand is just checked to see whether it kills
+     definitions of some dom virtual operands. */
+
+  for (tree dom_vuse = gimple_vuse (call); dom_vuse; )
+    {
+      gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse);
+      bool exist = visited.add (dom_vuse);
+
+      gcc_assert (!exist);
+
+      if (gimple_code (stmt) != GIMPLE_PHI)
+        {
+          if (stmt_may_clobber_ref_p_1 (stmt, &r))
+            {
+              struct ipa_known_agg_contents_list *content =
+                         XALLOCA (struct ipa_known_agg_contents_list);
+
+              if (!extract_mem_content (stmt, arg_base, check_ref, content))
+                break;
+
+              /* Now we get a dom virtual operand, and need to check whether
+                 its value can finally reach the call. And put it into the
+                 content list only if it is unique. */
+              if (!clobber_by_agg_contents_list_p (all_list, content)
+                  && add_to_agg_contents_list (&list, content, false))
+                {
+                  struct ipa_known_agg_contents_list *copy =
+                             XALLOCA (struct ipa_known_agg_contents_list);
+
+                  *copy = *content;
+                  content = copy;
+
+                  item_count +=1;
+                  const_count += !!(content->constant);
+
+                  if (item_count == 2 * ipa_max_agg_items
+                      || const_count == ipa_max_agg_items)
+                    break;
+                }
+              add_to_agg_contents_list (&all_list, content);
+            }
+          dom_vuse = gimple_vuse (stmt);
+          continue;
+        }
+
+      /* We get to a merge point in virtual SSA web, then go through all
+         non-dom virtual operands before handling next dom virtual operand. */
+      worklist.safe_push (dom_vuse);
+      dom_vuse = NULL_TREE;
+
+      do
+        {
+          tree vuse = worklist.pop ();
+          gimple *stmt = SSA_NAME_DEF_STMT (vuse);
+          auto_vec<tree, 6> append;
+
+          /* Non-dom virtual operand should not be the DEFAULT definition. */
+          gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (vuse));
+
+          if (gimple_code (stmt) != GIMPLE_PHI)
+            {
+              if (stmt_may_clobber_ref_p_1 (stmt, &r))
+                {
+                  struct ipa_known_agg_contents_list *content =
+                             XALLOCA (struct ipa_known_agg_contents_list);
+
+                  if (!extract_mem_content (stmt, arg_base, check_ref, content))
+                    goto finish;
+
+                  add_to_agg_contents_list (&all_list, content);
+                }
+              append.safe_push (gimple_vuse (stmt));
+            }
+          else
+            {
+              for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i)
+                {
+                  tree phi_arg = gimple_phi_arg_def (stmt, i);
+
+                  if (SSA_NAME_IS_DEFAULT_DEF (phi_arg))
+                    goto finish;
+
+                  append.safe_push (phi_arg);
+                }
+            }
+
+          for (unsigned i = 0; i < append.length (); ++i)
+            {
+              if (visited.add (vuse = append[i]))
+                continue;
+
+              if (SSA_NAME_IS_DEFAULT_DEF (vuse)
+                  || strictly_dominated_by_ssa_p (call_bb, vuse))
+                {
+                  /* Found a new dom virtual operand, stop going further until
+                     all pending non-dom virtual operands are processed. */
+                  gcc_assert (!dom_vuse);
+                  dom_vuse = vuse;
+                }
+              else
+                worklist.safe_push (vuse);
+            }
+        } while (!worklist.is_empty ());
+
+      gcc_assert (dom_vuse);
+
+      /* It is asserted that unprocessed dom virtual operand should be in
+         non-visited state. */
+      visited.remove (dom_vuse);
+    }
+
+finish:
 
   /* Third stage just goes over the list and creates an appropriate vector of
-     ipa_agg_jf_item structures out of it, of sourse only if there are
+     ipa_agg_jf_item structures out of it, of course only if there are
      any known constants to begin with.  */
 
   if (const_count)
@@ -1691,6 +1834,7 @@  determine_locally_known_aggregate_parts (gcall *call, tree arg,
     }
 }
 
+
 /* Return the Ith param type of callee associated with call graph
    edge E.  */
 
@@ -1797,7 +1941,7 @@  ipa_set_jfunc_vr (ipa_jump_func *jf, enum value_range_kind type,
   jf->m_vr = ipa_get_value_range (type, min, max);
 }
 
-/* Assign to JF a pointer to a value_range just liek TMP but either fetch a
+/* Assign to JF a pointer to a value_range just like TMP but either fetch a
    copy from ipa_vr_hash_table or allocate a new on in GC memory.  */
 
 static void
@@ -1978,7 +2122,7 @@  ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi,
 	      || !ipa_get_jf_ancestor_agg_preserved (jfunc))
 	  && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
 	      || POINTER_TYPE_P (param_type)))
-	determine_locally_known_aggregate_parts (call, arg, param_type, jfunc);
+	determine_known_aggregate_parts (call, arg, param_type, jfunc);
     }
   if (!useful_context)
     vec_free (args->polymorphic_call_contexts);
diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
new file mode 100644
index 00000000000..131c4e265bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
@@ -0,0 +1,78 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */
+
+int data1;
+
+int callee1(int *v)
+{
+  if (*v < 2)
+    return 0;
+  else 
+    {
+      int t = data1;
+
+      data1 = *v;
+      *v = t;
+
+      return 1;
+    }
+}
+
+int __attribute((pure)) callee2(int *v)
+{
+  if (*v < 2)
+    return 0;
+  else 
+    {
+      data1 = v[0] + v[2];
+
+      return 1;
+    }
+}
+
+int caller1(int c, int *r)
+{
+  int a = 1;
+
+  if (c)
+    return callee1(&a);
+  else
+    {
+      *r = 2;
+      return callee1(r);
+    }
+}
+
+int data2[200];
+int data3;
+
+int __attribute((const)) gen_cond(int);
+
+int caller2(void)
+{
+  int i, j;
+  int sum = 0;
+  int a[8];
+
+  a[0] = 3;
+  for (i = 0; i < 100; i++)
+    {
+      if (gen_cond (i))
+        continue;
+
+      a[2] = 4;
+      for (j = 0; j < 100; j++)
+        {
+          data2[i + j] = (i ^ j) + data3;
+
+          sum += callee2(a);
+        }
+    }
+
+  return sum;
+}
+
+/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 1" 1 "cp" } } */
+/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 2" 1 "cp" } } */
+/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 3" 1 "cp" } } */
+/* { dg-final { scan-ipa-dump-times "offset: 64, cst: 4" 1 "cp" } } */