diff mbox series

Fix PR 100944 - Array boundary misscalculation

Message ID 20210613170007.2868159-1-giuliano.belinassi@usp.br
State New
Headers show
Series Fix PR 100944 - Array boundary misscalculation | expand

Commit Message

Giuliano Belinassi June 13, 2021, 5 p.m. UTC
This patch proposes a fix to PR 100944 by improving the array boundary
computation when the flexible array has no clear constructor: if no
constructor were found in the input code, we compute the size of the
array as:

  offset(array begin) - offset(next element in RECORD_TYPE)

If no next element is found in the RECORD, simply fall back to:

  offset(array begin) - offset(RECORD_TYPE ends)

We avoid doing this calculation if the RECORD_TYPE is actually an
UNION_TYPE, once things get very complicated in this case.

gcc/ChangeLog:
2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>

	PR middle-end/100944
	* tree-dfa.c (get_ref_base_and_unit_offset): Add option to compute size
	of next field.
	* (get_ref_base_and_unit_offset_1): Same as above.
	* tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
	(get_ref_base_and_unit_offset_1): Same as above.
	* tree.c (least_common_record_1): New.
	(least_common_record): New.
	(component_ref_size): Improve array size calculation.

gcc/testsuite/ChangeLog:
2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>

	PR middle-end/100944
	* gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
	* gcc.dg/Warray-bounds-71.c: New test.
---
 gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
 .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
 gcc/tree-dfa.c                                |  31 ++++-
 gcc/tree-dfa.h                                |   6 +-
 gcc/tree.c                                    | 115 ++++++++++++++----
 5 files changed, 172 insertions(+), 40 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c

Comments

Richard Biener June 14, 2021, 6:36 a.m. UTC | #1
On Sun, Jun 13, 2021 at 7:00 PM Giuliano Belinassi via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch proposes a fix to PR 100944 by improving the array boundary
> computation when the flexible array has no clear constructor: if no
> constructor were found in the input code, we compute the size of the
> array as:
>
>   offset(array begin) - offset(next element in RECORD_TYPE)
>
> If no next element is found in the RECORD, simply fall back to:
>
>   offset(array begin) - offset(RECORD_TYPE ends)
>
> We avoid doing this calculation if the RECORD_TYPE is actually an
> UNION_TYPE, once things get very complicated in this case.

I'm looking at Wzero-length-array-bounds.c and wonder why the patch
makes a difference here.

struct X { int a[0]; int b, c; };

a is clearly not a flex-array?

The code in component_ref_size is truly weird.  IMHO using
get_ref_base_and_unit_offset for computing the size of a flexarray
member is the wrong utility - instead a much better choice
(also for consistency) would be array_at_struct_end_p itself,
for example by giving it an optional HOST_WIDE_INT *size
argument.

Richard.

> gcc/ChangeLog:
> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>
>         PR middle-end/100944
>         * tree-dfa.c (get_ref_base_and_unit_offset): Add option to compute size
>         of next field.
>         * (get_ref_base_and_unit_offset_1): Same as above.
>         * tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
>         (get_ref_base_and_unit_offset_1): Same as above.
>         * tree.c (least_common_record_1): New.
>         (least_common_record): New.
>         (component_ref_size): Improve array size calculation.
>
> gcc/testsuite/ChangeLog:
> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>
>         PR middle-end/100944
>         * gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
>         * gcc.dg/Warray-bounds-71.c: New test.
> ---
>  gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
>  .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
>  gcc/tree-dfa.c                                |  31 ++++-
>  gcc/tree-dfa.h                                |   6 +-
>  gcc/tree.c                                    | 115 ++++++++++++++----
>  5 files changed, 172 insertions(+), 40 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
>
> diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> new file mode 100644
> index 00000000000..cc5b083bc77
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> @@ -0,0 +1,42 @@
> +/* PR middle-end/100944 - missing -Warray-bounds accessing a flexible array
> +   member of a nested struct
> +   { dg-do compile }
> +   { dg-options "-O2 -Wall" } */
> +
> +struct A0
> +{
> +  int i, a[0];
> +};
> +
> +struct B0
> +{
> +  struct A0 a;
> +  long x;
> +} b0;
> +
> +void f0 (int i)
> +{
> +  long t = b0.x;
> +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> +  if (t != b0.x)    // folded to false
> +    __builtin_abort ();
> +}
> +
> +struct Ax
> +{
> +  int i, a[];
> +};
> +
> +struct Bx
> +{
> +  struct Ax a;
> +  long x;
> +} bx;
> +
> +void fx (int i)
> +{
> +  long t = bx.x;
> +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> +  if (t != bx.x)    // folded to false
> +    __builtin_abort ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> index 8e880d92dea..117b30ff294 100644
> --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> @@ -68,21 +68,21 @@ extern struct Y y;
>
>  void access_to_member (int i)
>  {
> -  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> +  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> +  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>    sink (a);
>
> -  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> +  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>    /* Similar to the array case above, accesses to a subsequent member
>       of the "parent" struct seem like a more severe problem than those
>       to the next member of the same struct.  */
> -  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>    sink (a);
>
> -  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> +  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> +  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>    y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>  }
> diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
> index 1d20de0c400..dc3c15f11d5 100644
> --- a/gcc/tree-dfa.c
> +++ b/gcc/tree-dfa.c
> @@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT *poffset,
>
>  tree
>  get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
> -                                tree (*valueize) (tree))
> +                                tree (*valueize) (tree),
> +                                bool of_next_component /* = false.  */)
>  {
>    poly_int64 byte_offset = 0;
> +  tree next_field = NULL_TREE;
>
>    /* Compute cumulative byte-offset for nested component-refs and array-refs,
>       and find the ultimate containing object.  */
> @@ -797,11 +799,27 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>         case COMPONENT_REF:
>           {
>             tree field = TREE_OPERAND (exp, 1);
> -           tree this_offset = component_ref_field_offset (exp);
> +           tree this_offset;
> +
>             poly_int64 hthis_offset;
>
> +           if (of_next_component && !next_field)
> +             {
> +               /* We are looking for the next component of the record.  */
> +               next_field = TREE_CHAIN (field);
> +               if (!next_field)
> +                 break;
> +
> +               /* We found a next component.  Flag that we found it and
> +                  update the target field.  */
> +               field = next_field;
> +             }
> +
> +           this_offset = component_ref_field_offset (exp);
> +
>             if (!this_offset
>                 || !poly_int_tree_p (this_offset, &hthis_offset)
> +               || TREE_CODE (field) != FIELD_DECL
>                 || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
>                     % BITS_PER_UNIT))
>               return NULL_TREE;
> @@ -904,6 +922,9 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>  done:
>
>    *poffset = byte_offset;
> +
> +  if (of_next_component)
> +    return next_field;
>    return exp;
>  }
>
> @@ -913,9 +934,11 @@ done:
>     is not BITS_PER_UNIT-aligned.  */
>
>  tree
> -get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
> +get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset, bool
> +                              of_next_component /* = false.  */)
>  {
> -  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
> +  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
> +                                         of_next_component);
>  }
>
>  /* Returns true if STMT references an SSA_NAME that has
> diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
> index b1457ab065c..94e44d9c3f6 100644
> --- a/gcc/tree-dfa.h
> +++ b/gcc/tree-dfa.h
> @@ -34,8 +34,10 @@ extern tree get_ref_base_and_extent (tree, poly_int64_pod *, poly_int64_pod *,
>  extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
>                                          HOST_WIDE_INT *, bool *);
>  extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod *,
> -                                            tree (*) (tree));
> -extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *);
> +                                            tree (*) (tree),
> +                                            bool of_next_component = false);
> +extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
> +                                          bool = false);
>  extern bool stmt_references_abnormal_ssa_name (gimple *);
>  extern void replace_abnormal_ssa_names (gimple *);
>  extern void dump_enumerated_decls (FILE *, dump_flags_t);
> diff --git a/gcc/tree.c b/gcc/tree.c
> index 1aa6e557a04..45d7fa2ae92 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -12649,6 +12649,47 @@ get_initializer_for (tree init, tree decl)
>    return NULL_TREE;
>  }
>
> +static int
> +least_common_record_1 (tree basetype, tree field1, tree field2,
> +                      tree *least_basetype)
> +{
> +  int ret = 0;
> +
> +  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN (fld))
> +    {
> +      if (fld == field1 || fld == field2)
> +       ret++;
> +
> +      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
> +         || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
> +       ret += least_common_record_1 (TREE_TYPE (fld), field1, field2,
> +                                     least_basetype);
> +    }
> +
> +  if (ret == 2)
> +    {
> +      *least_basetype = basetype;
> +      ret++; /* Avoid getting in this block again if a common basetype were
> +               found.  */
> +    }
> +
> +  return ret;
> +}
> +
> +/* Find the least common RECORD type common to two FIELDS from base.  */
> +static tree
> +least_common_record (tree basetype, tree field1, tree field2)
> +{
> +  if (!(TREE_CODE (basetype) == RECORD_TYPE
> +       || TREE_CODE (basetype) == UNION_TYPE))
> +    return NULL_TREE;
> +
> +  tree least_basetype = NULL_TREE;
> +  least_common_record_1 (basetype, field1, field2, &least_basetype);
> +
> +  return least_basetype;
> +}
> +
>  /* Determines the size of the member referenced by the COMPONENT_REF
>     REF, using its initializer expression if necessary in order to
>     determine the size of an initialized flexible array member.
> @@ -12768,28 +12809,30 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>    memsize = NULL_TREE;
>
>    if (typematch)
> -    /* MEMBER is a true flexible array member.  Compute its size from
> -       the initializer of the BASE object if it has one.  */
> -    if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
> -      if (init != error_mark_node)
> -       {
> -         init = get_initializer_for (init, member);
> -         if (init)
> -           {
> -             memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> -             if (tree refsize = TYPE_SIZE_UNIT (argtype))
> -               {
> -                 /* Use the larger of the initializer size and the tail
> -                    padding in the enclosing struct.  */
> -                 poly_int64 rsz = tree_to_poly_int64 (refsize);
> -                 rsz -= baseoff;
> -                 if (known_lt (tree_to_poly_int64 (memsize), rsz))
> -                   memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
> -               }
> -
> -             baseoff = 0;
> -           }
> -       }
> +    {
> +      /* MEMBER is a true flexible array member.  Compute its size from
> +        the initializer of the BASE object if it has one.  */
> +      if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
> +       if (init != error_mark_node)
> +         {
> +           init = get_initializer_for (init, member);
> +           if (init)
> +             {
> +               memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> +               if (tree refsize = TYPE_SIZE_UNIT (argtype))
> +                 {
> +                   /* Use the larger of the initializer size and the tail
> +                      padding in the enclosing struct.  */
> +                   poly_int64 rsz = tree_to_poly_int64 (refsize);
> +                   rsz -= baseoff;
> +                   if (known_lt (tree_to_poly_int64 (memsize), rsz))
> +                     memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
> +                 }
> +
> +               baseoff = 0;
> +             }
> +         }
> +    }
>
>    if (!memsize)
>      {
> @@ -12810,9 +12853,31 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>           memsize = TYPE_SIZE_UNIT (bt);
>         }
>        else if (DECL_P (base))
> -       /* Use the size of the BASE object (possibly an array of some
> -          other type such as char used to store the struct).  */
> -       memsize = DECL_SIZE_UNIT (base);
> +       {
> +         /* It is possible that the flexible array has no constructor at all.
> +            In such cases, GCC will allocate the array in some weird way.
> +            Assume that the array size is the difference between the start
> +            address of the array and the next component, if it exists.  */
> +
> +         tree lcr;
> +
> +         poly_int64 baseoff2 = 0;
> +         tree next_field = get_addr_base_and_unit_offset (ref, &baseoff2,
> +                                                     /* next_elmnt =  */ true);
> +         if (next_field
> +             && (lcr = least_common_record (basetype, member, next_field))
> +             && TREE_CODE (lcr) == RECORD_TYPE
> +             && known_ge (baseoff2, baseoff))
> +           {
> +               poly_int64 size =  baseoff2 - baseoff;
> +               memsize = wide_int_to_tree (TREE_TYPE (DECL_SIZE_UNIT (base)),
> +                                           size);
> +           }
> +         else
> +           /* Use the size of the BASE object (possibly an array of some
> +              other type such as char used to store the struct).  */
> +           memsize = DECL_SIZE_UNIT (base);
> +       }
>        else
>         return NULL_TREE;
>      }
> --
> 2.32.0
>
Martin Sebor June 14, 2021, 5:32 p.m. UTC | #2
On 6/14/21 12:36 AM, Richard Biener via Gcc-patches wrote:
> On Sun, Jun 13, 2021 at 7:00 PM Giuliano Belinassi via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> This patch proposes a fix to PR 100944 by improving the array boundary
>> computation when the flexible array has no clear constructor: if no
>> constructor were found in the input code, we compute the size of the
>> array as:
>>
>>    offset(array begin) - offset(next element in RECORD_TYPE)
>>
>> If no next element is found in the RECORD, simply fall back to:
>>
>>    offset(array begin) - offset(RECORD_TYPE ends)
>>
>> We avoid doing this calculation if the RECORD_TYPE is actually an
>> UNION_TYPE, once things get very complicated in this case.
> 
> I'm looking at Wzero-length-array-bounds.c and wonder why the patch
> makes a difference here.
> 
> struct X { int a[0]; int b, c; };
> 
> a is clearly not a flex-array?

The warning issued here shouldn't change.  -Wzero-length-bounds is
documented to

   Warn about Accesses to elements of zero-length array members that
   might overlap other members of the same object.

By "the object" the text means the complete object (if one can be
determined), not necessarily just the subobject of the enclosing
struct.  This is contrast to accesses to zero-length arrays that
are out of the bounds of the complete object.  This distinction
was made to help the kernel transition to the new warning (I'm
not sure if they're done with the cleanup yet).

> 
> The code in component_ref_size is truly weird.  IMHO using
> get_ref_base_and_unit_offset for computing the size of a flexarray
> member is the wrong utility - instead a much better choice
> (also for consistency) would be array_at_struct_end_p itself,
> for example by giving it an optional HOST_WIDE_INT *size
> argument.

array_at_struct_end_p() is permissive and treats all trailing arrays
as effectively unbounded if it doesn't know the enclosing object.
-Warray-bounds and all other access warnings are more conservative
and treat only flexible array members and trailing arrays of zero
length as unbounded (except in complete objects where they consider
the initializer for the former).

Martin

> 
> Richard.
> 
>> gcc/ChangeLog:
>> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>>
>>          PR middle-end/100944
>>          * tree-dfa.c (get_ref_base_and_unit_offset): Add option to compute size
>>          of next field.
>>          * (get_ref_base_and_unit_offset_1): Same as above.
>>          * tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
>>          (get_ref_base_and_unit_offset_1): Same as above.
>>          * tree.c (least_common_record_1): New.
>>          (least_common_record): New.
>>          (component_ref_size): Improve array size calculation.
>>
>> gcc/testsuite/ChangeLog:
>> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>>
>>          PR middle-end/100944
>>          * gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
>>          * gcc.dg/Warray-bounds-71.c: New test.
>> ---
>>   gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
>>   .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
>>   gcc/tree-dfa.c                                |  31 ++++-
>>   gcc/tree-dfa.h                                |   6 +-
>>   gcc/tree.c                                    | 115 ++++++++++++++----
>>   5 files changed, 172 insertions(+), 40 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
>> new file mode 100644
>> index 00000000000..cc5b083bc77
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
>> @@ -0,0 +1,42 @@
>> +/* PR middle-end/100944 - missing -Warray-bounds accessing a flexible array
>> +   member of a nested struct
>> +   { dg-do compile }
>> +   { dg-options "-O2 -Wall" } */
>> +
>> +struct A0
>> +{
>> +  int i, a[0];
>> +};
>> +
>> +struct B0
>> +{
>> +  struct A0 a;
>> +  long x;
>> +} b0;
>> +
>> +void f0 (int i)
>> +{
>> +  long t = b0.x;
>> +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
>> +  if (t != b0.x)    // folded to false
>> +    __builtin_abort ();
>> +}
>> +
>> +struct Ax
>> +{
>> +  int i, a[];
>> +};
>> +
>> +struct Bx
>> +{
>> +  struct Ax a;
>> +  long x;
>> +} bx;
>> +
>> +void fx (int i)
>> +{
>> +  long t = bx.x;
>> +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
>> +  if (t != bx.x)    // folded to false
>> +    __builtin_abort ();
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>> index 8e880d92dea..117b30ff294 100644
>> --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>> +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>> @@ -68,21 +68,21 @@ extern struct Y y;
>>
>>   void access_to_member (int i)
>>   {
>> -  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>> -  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>> -  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>> +  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>> +  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>> +  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>     sink (a);
>>
>> -  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>> -  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>> +  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>> +  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>     /* Similar to the array case above, accesses to a subsequent member
>>        of the "parent" struct seem like a more severe problem than those
>>        to the next member of the same struct.  */
>> -  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>> +  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>     sink (a);
>>
>> -  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
>> -  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
>> -  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
>> +  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>> +  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>> +  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>     y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>   }
>> diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
>> index 1d20de0c400..dc3c15f11d5 100644
>> --- a/gcc/tree-dfa.c
>> +++ b/gcc/tree-dfa.c
>> @@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT *poffset,
>>
>>   tree
>>   get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>> -                                tree (*valueize) (tree))
>> +                                tree (*valueize) (tree),
>> +                                bool of_next_component /* = false.  */)
>>   {
>>     poly_int64 byte_offset = 0;
>> +  tree next_field = NULL_TREE;
>>
>>     /* Compute cumulative byte-offset for nested component-refs and array-refs,
>>        and find the ultimate containing object.  */
>> @@ -797,11 +799,27 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>>          case COMPONENT_REF:
>>            {
>>              tree field = TREE_OPERAND (exp, 1);
>> -           tree this_offset = component_ref_field_offset (exp);
>> +           tree this_offset;
>> +
>>              poly_int64 hthis_offset;
>>
>> +           if (of_next_component && !next_field)
>> +             {
>> +               /* We are looking for the next component of the record.  */
>> +               next_field = TREE_CHAIN (field);
>> +               if (!next_field)
>> +                 break;
>> +
>> +               /* We found a next component.  Flag that we found it and
>> +                  update the target field.  */
>> +               field = next_field;
>> +             }
>> +
>> +           this_offset = component_ref_field_offset (exp);
>> +
>>              if (!this_offset
>>                  || !poly_int_tree_p (this_offset, &hthis_offset)
>> +               || TREE_CODE (field) != FIELD_DECL
>>                  || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
>>                      % BITS_PER_UNIT))
>>                return NULL_TREE;
>> @@ -904,6 +922,9 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>>   done:
>>
>>     *poffset = byte_offset;
>> +
>> +  if (of_next_component)
>> +    return next_field;
>>     return exp;
>>   }
>>
>> @@ -913,9 +934,11 @@ done:
>>      is not BITS_PER_UNIT-aligned.  */
>>
>>   tree
>> -get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
>> +get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset, bool
>> +                              of_next_component /* = false.  */)
>>   {
>> -  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
>> +  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
>> +                                         of_next_component);
>>   }
>>
>>   /* Returns true if STMT references an SSA_NAME that has
>> diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
>> index b1457ab065c..94e44d9c3f6 100644
>> --- a/gcc/tree-dfa.h
>> +++ b/gcc/tree-dfa.h
>> @@ -34,8 +34,10 @@ extern tree get_ref_base_and_extent (tree, poly_int64_pod *, poly_int64_pod *,
>>   extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
>>                                           HOST_WIDE_INT *, bool *);
>>   extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod *,
>> -                                            tree (*) (tree));
>> -extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *);
>> +                                            tree (*) (tree),
>> +                                            bool of_next_component = false);
>> +extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
>> +                                          bool = false);
>>   extern bool stmt_references_abnormal_ssa_name (gimple *);
>>   extern void replace_abnormal_ssa_names (gimple *);
>>   extern void dump_enumerated_decls (FILE *, dump_flags_t);
>> diff --git a/gcc/tree.c b/gcc/tree.c
>> index 1aa6e557a04..45d7fa2ae92 100644
>> --- a/gcc/tree.c
>> +++ b/gcc/tree.c
>> @@ -12649,6 +12649,47 @@ get_initializer_for (tree init, tree decl)
>>     return NULL_TREE;
>>   }
>>
>> +static int
>> +least_common_record_1 (tree basetype, tree field1, tree field2,
>> +                      tree *least_basetype)
>> +{
>> +  int ret = 0;
>> +
>> +  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN (fld))
>> +    {
>> +      if (fld == field1 || fld == field2)
>> +       ret++;
>> +
>> +      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
>> +         || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
>> +       ret += least_common_record_1 (TREE_TYPE (fld), field1, field2,
>> +                                     least_basetype);
>> +    }
>> +
>> +  if (ret == 2)
>> +    {
>> +      *least_basetype = basetype;
>> +      ret++; /* Avoid getting in this block again if a common basetype were
>> +               found.  */
>> +    }
>> +
>> +  return ret;
>> +}
>> +
>> +/* Find the least common RECORD type common to two FIELDS from base.  */
>> +static tree
>> +least_common_record (tree basetype, tree field1, tree field2)
>> +{
>> +  if (!(TREE_CODE (basetype) == RECORD_TYPE
>> +       || TREE_CODE (basetype) == UNION_TYPE))
>> +    return NULL_TREE;
>> +
>> +  tree least_basetype = NULL_TREE;
>> +  least_common_record_1 (basetype, field1, field2, &least_basetype);
>> +
>> +  return least_basetype;
>> +}
>> +
>>   /* Determines the size of the member referenced by the COMPONENT_REF
>>      REF, using its initializer expression if necessary in order to
>>      determine the size of an initialized flexible array member.
>> @@ -12768,28 +12809,30 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>>     memsize = NULL_TREE;
>>
>>     if (typematch)
>> -    /* MEMBER is a true flexible array member.  Compute its size from
>> -       the initializer of the BASE object if it has one.  */
>> -    if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
>> -      if (init != error_mark_node)
>> -       {
>> -         init = get_initializer_for (init, member);
>> -         if (init)
>> -           {
>> -             memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
>> -             if (tree refsize = TYPE_SIZE_UNIT (argtype))
>> -               {
>> -                 /* Use the larger of the initializer size and the tail
>> -                    padding in the enclosing struct.  */
>> -                 poly_int64 rsz = tree_to_poly_int64 (refsize);
>> -                 rsz -= baseoff;
>> -                 if (known_lt (tree_to_poly_int64 (memsize), rsz))
>> -                   memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
>> -               }
>> -
>> -             baseoff = 0;
>> -           }
>> -       }
>> +    {
>> +      /* MEMBER is a true flexible array member.  Compute its size from
>> +        the initializer of the BASE object if it has one.  */
>> +      if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
>> +       if (init != error_mark_node)
>> +         {
>> +           init = get_initializer_for (init, member);
>> +           if (init)
>> +             {
>> +               memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
>> +               if (tree refsize = TYPE_SIZE_UNIT (argtype))
>> +                 {
>> +                   /* Use the larger of the initializer size and the tail
>> +                      padding in the enclosing struct.  */
>> +                   poly_int64 rsz = tree_to_poly_int64 (refsize);
>> +                   rsz -= baseoff;
>> +                   if (known_lt (tree_to_poly_int64 (memsize), rsz))
>> +                     memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
>> +                 }
>> +
>> +               baseoff = 0;
>> +             }
>> +         }
>> +    }
>>
>>     if (!memsize)
>>       {
>> @@ -12810,9 +12853,31 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>>            memsize = TYPE_SIZE_UNIT (bt);
>>          }
>>         else if (DECL_P (base))
>> -       /* Use the size of the BASE object (possibly an array of some
>> -          other type such as char used to store the struct).  */
>> -       memsize = DECL_SIZE_UNIT (base);
>> +       {
>> +         /* It is possible that the flexible array has no constructor at all.
>> +            In such cases, GCC will allocate the array in some weird way.
>> +            Assume that the array size is the difference between the start
>> +            address of the array and the next component, if it exists.  */
>> +
>> +         tree lcr;
>> +
>> +         poly_int64 baseoff2 = 0;
>> +         tree next_field = get_addr_base_and_unit_offset (ref, &baseoff2,
>> +                                                     /* next_elmnt =  */ true);
>> +         if (next_field
>> +             && (lcr = least_common_record (basetype, member, next_field))
>> +             && TREE_CODE (lcr) == RECORD_TYPE
>> +             && known_ge (baseoff2, baseoff))
>> +           {
>> +               poly_int64 size =  baseoff2 - baseoff;
>> +               memsize = wide_int_to_tree (TREE_TYPE (DECL_SIZE_UNIT (base)),
>> +                                           size);
>> +           }
>> +         else
>> +           /* Use the size of the BASE object (possibly an array of some
>> +              other type such as char used to store the struct).  */
>> +           memsize = DECL_SIZE_UNIT (base);
>> +       }
>>         else
>>          return NULL_TREE;
>>       }
>> --
>> 2.32.0
>>
Martin Sebor June 14, 2021, 9:55 p.m. UTC | #3
On 6/13/21 11:00 AM, Giuliano Belinassi wrote:
> This patch proposes a fix to PR 100944 by improving the array boundary

Thanks for the patch!

> computation when the flexible array has no clear constructor: if no
> constructor were found in the input code, we compute the size of the
> array as:
> 
>    offset(array begin) - offset(next element in RECORD_TYPE)
> 
> If no next element is found in the RECORD, simply fall back to:
> 
>    offset(array begin) - offset(RECORD_TYPE ends)

I'm not sure I understand the distinction between these expressions.
I'd expect them to give the same result: either zero when there's no
tail padding or negative when there is some.  Is there a case when
they don't?

I would think the fix to need to change just component_ref_size()
without having to touch other utility functions.  It seems to me
the problem is in the logic that deals with accesses to backing
buffers, as in:

   char a[sizeof (struct Bx)];
   struct Bx *p = (struct Bx*) a;
   p->a.a[i] = 1;

This should also be diagnosed.

The offset of the next member shouldn't matter, just the number of
elements in the array's initializer if it has one or the size of
the struct whose member is being accessed (in addition to the offset
of the member).

> 
> We avoid doing this calculation if the RECORD_TYPE is actually an
> UNION_TYPE, once things get very complicated in this case.
> 
> gcc/ChangeLog:
> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
> 
> 	PR middle-end/100944
> 	* tree-dfa.c (get_ref_base_and_unit_offset): Add option to compute size
> 	of next field.
> 	* (get_ref_base_and_unit_offset_1): Same as above.
> 	* tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
> 	(get_ref_base_and_unit_offset_1): Same as above.
> 	* tree.c (least_common_record_1): New.
> 	(least_common_record): New.
> 	(component_ref_size): Improve array size calculation.
> 
> gcc/testsuite/ChangeLog:
> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
> 
> 	PR middle-end/100944
> 	* gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
> 	* gcc.dg/Warray-bounds-71.c: New test.
> ---
>   gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
>   .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
>   gcc/tree-dfa.c                                |  31 ++++-
>   gcc/tree-dfa.h                                |   6 +-
>   gcc/tree.c                                    | 115 ++++++++++++++----
>   5 files changed, 172 insertions(+), 40 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
> 
> diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> new file mode 100644
> index 00000000000..cc5b083bc77
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> @@ -0,0 +1,42 @@
> +/* PR middle-end/100944 - missing -Warray-bounds accessing a flexible array
> +   member of a nested struct
> +   { dg-do compile }
> +   { dg-options "-O2 -Wall" } */
> +
> +struct A0
> +{
> +  int i, a[0];
> +};
> +
> +struct B0
> +{
> +  struct A0 a;
> +  long x;
> +} b0;
> +
> +void f0 (int i)
> +{
> +  long t = b0.x;
> +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> +  if (t != b0.x)    // folded to false
> +    __builtin_abort ();
> +}
> +
> +struct Ax
> +{
> +  int i, a[];
> +};
> +
> +struct Bx
> +{
> +  struct Ax a;
> +  long x;
> +} bx;
> +
> +void fx (int i)
> +{
> +  long t = bx.x;
> +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> +  if (t != bx.x)    // folded to false
> +    __builtin_abort ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> index 8e880d92dea..117b30ff294 100644
> --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> @@ -68,21 +68,21 @@ extern struct Y y;
>   
>   void access_to_member (int i)
>   {
> -  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> +  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> +  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }

We do want to keep -Wzero-length-bounds for these accesses (as Richard
already noted).  I think same for those below (whenever there's overlap
between a zero-length array and a subsequent member).

>     sink (a);
>   
> -  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> +  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>     /* Similar to the array case above, accesses to a subsequent member
>        of the "parent" struct seem like a more severe problem than those
>        to the next member of the same struct.  */
> -  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>     sink (a);
>   
> -  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> -  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> +  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> +  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> +  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>     y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>   }
> diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
> index 1d20de0c400..dc3c15f11d5 100644
> --- a/gcc/tree-dfa.c
> +++ b/gcc/tree-dfa.c
> @@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT *poffset,
>   
>   tree
>   get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
> -				 tree (*valueize) (tree))
> +				 tree (*valueize) (tree),
> +				 bool of_next_component /* = false.  */)
>   {
>     poly_int64 byte_offset = 0;
> +  tree next_field = NULL_TREE;
>   
>     /* Compute cumulative byte-offset for nested component-refs and array-refs,
>        and find the ultimate containing object.  */
> @@ -797,11 +799,27 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>   	case COMPONENT_REF:
>   	  {
>   	    tree field = TREE_OPERAND (exp, 1);
> -	    tree this_offset = component_ref_field_offset (exp);
> +	    tree this_offset;
> +
>   	    poly_int64 hthis_offset;
>   
> +	    if (of_next_component && !next_field)
> +	      {
> +		/* We are looking for the next component of the record.  */
> +		next_field = TREE_CHAIN (field);
> +		if (!next_field)
> +		  break;
> +
> +		/* We found a next component.  Flag that we found it and
> +		   update the target field.  */
> +		field = next_field;
> +	      }
> +
> +	    this_offset = component_ref_field_offset (exp);
> +
>   	    if (!this_offset
>   		|| !poly_int_tree_p (this_offset, &hthis_offset)
> +		|| TREE_CODE (field) != FIELD_DECL
>   		|| (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
>   		    % BITS_PER_UNIT))
>   	      return NULL_TREE;
> @@ -904,6 +922,9 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>   done:
>   
>     *poffset = byte_offset;
> +
> +  if (of_next_component)
> +    return next_field;
>     return exp;
>   }
>   
> @@ -913,9 +934,11 @@ done:
>      is not BITS_PER_UNIT-aligned.  */
>   
>   tree
> -get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
> +get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset, bool
> +			       of_next_component /* = false.  */)
>   {
> -  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
> +  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
> +					  of_next_component);
>   }
>   
>   /* Returns true if STMT references an SSA_NAME that has
> diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
> index b1457ab065c..94e44d9c3f6 100644
> --- a/gcc/tree-dfa.h
> +++ b/gcc/tree-dfa.h
> @@ -34,8 +34,10 @@ extern tree get_ref_base_and_extent (tree, poly_int64_pod *, poly_int64_pod *,
>   extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
>   					 HOST_WIDE_INT *, bool *);
>   extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod *,
> -					     tree (*) (tree));
> -extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *);
> +					     tree (*) (tree),
> +					     bool of_next_component = false);
> +extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
> +					   bool = false);
>   extern bool stmt_references_abnormal_ssa_name (gimple *);
>   extern void replace_abnormal_ssa_names (gimple *);
>   extern void dump_enumerated_decls (FILE *, dump_flags_t);
> diff --git a/gcc/tree.c b/gcc/tree.c
> index 1aa6e557a04..45d7fa2ae92 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -12649,6 +12649,47 @@ get_initializer_for (tree init, tree decl)
>     return NULL_TREE;
>   }
>   
> +static int
> +least_common_record_1 (tree basetype, tree field1, tree field2,
> +		       tree *least_basetype)
> +{
> +  int ret = 0;
> +
> +  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN (fld))
> +    {
> +      if (fld == field1 || fld == field2)
> +	ret++;
> +
> +      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
> +	  || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
> +	ret += least_common_record_1 (TREE_TYPE (fld), field1, field2,
> +				      least_basetype);
> +    }
> +
> +  if (ret == 2)
> +    {
> +      *least_basetype = basetype;
> +      ret++; /* Avoid getting in this block again if a common basetype were
> +		found.  */
> +    }
> +
> +  return ret;
> +}
> +
> +/* Find the least common RECORD type common to two FIELDS from base.  */
> +static tree
> +least_common_record (tree basetype, tree field1, tree field2)
> +{
> +  if (!(TREE_CODE (basetype) == RECORD_TYPE
> +	|| TREE_CODE (basetype) == UNION_TYPE))
> +    return NULL_TREE;
> +
> +  tree least_basetype = NULL_TREE;
> +  least_common_record_1 (basetype, field1, field2, &least_basetype);
> +
> +  return least_basetype;
> +}
> +
>   /* Determines the size of the member referenced by the COMPONENT_REF
>      REF, using its initializer expression if necessary in order to
>      determine the size of an initialized flexible array member.
> @@ -12768,28 +12809,30 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>     memsize = NULL_TREE;
>   
>     if (typematch)
> -    /* MEMBER is a true flexible array member.  Compute its size from
> -       the initializer of the BASE object if it has one.  */
> -    if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
> -      if (init != error_mark_node)
> -	{
> -	  init = get_initializer_for (init, member);
> -	  if (init)
> -	    {
> -	      memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> -	      if (tree refsize = TYPE_SIZE_UNIT (argtype))
> -		{
> -		  /* Use the larger of the initializer size and the tail
> -		     padding in the enclosing struct.  */
> -		  poly_int64 rsz = tree_to_poly_int64 (refsize);
> -		  rsz -= baseoff;
> -		  if (known_lt (tree_to_poly_int64 (memsize), rsz))
> -		    memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
> -		}
> -
> -	      baseoff = 0;
> -	    }
> -	}
> +    {
> +      /* MEMBER is a true flexible array member.  Compute its size from
> +	 the initializer of the BASE object if it has one.  */
> +      if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
> +	if (init != error_mark_node)
> +	  {
> +	    init = get_initializer_for (init, member);
> +	    if (init)
> +	      {
> +		memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> +		if (tree refsize = TYPE_SIZE_UNIT (argtype))
> +		  {
> +		    /* Use the larger of the initializer size and the tail
> +		       padding in the enclosing struct.  */
> +		    poly_int64 rsz = tree_to_poly_int64 (refsize);
> +		    rsz -= baseoff;
> +		    if (known_lt (tree_to_poly_int64 (memsize), rsz))
> +		      memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
> +		  }
> +
> +		baseoff = 0;
> +	      }
> +	  }
> +    }
>   
>     if (!memsize)
>       {
> @@ -12810,9 +12853,31 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>   	  memsize = TYPE_SIZE_UNIT (bt);
>   	}
>         else if (DECL_P (base))
> -	/* Use the size of the BASE object (possibly an array of some
> -	   other type such as char used to store the struct).  */
> -	memsize = DECL_SIZE_UNIT (base);
> +	{
> +	  /* It is possible that the flexible array has no constructor at all.
> +	     In such cases, GCC will allocate the array in some weird way.

Weird isn't quite the word.  If there's no initializer there may be tail
padding after the flexible array member.  This is allowed by C and for
declared objects fixed by each ABI.  The padding may be used to store
elements of the flexible array member and so accesses to such elements
should not be diagnosed.  But this is only an exception for flexible
array members, not for any other member arrays.

By the way, I think a similar problem exists in check_mem_ref() for
accesses to flexible array members by pointers, although the fix is
probably going to be different:

   void fp (int i)
   {
     int *q = bx.a.a;
     q[i] = 0;   // missing -Warray-bounds
   }

Longer term, to reduce code duplication, I'd like to replace the logic
in gimple-array-bounds.cc with the pointer_query class which is used
for all the other access warnings.  But that's a bigger project.

Martin

> +	     Assume that the array size is the difference between the start
> +	     address of the array and the next component, if it exists.  */
> +
> +	  tree lcr;
> +
> +	  poly_int64 baseoff2 = 0;
> +	  tree next_field = get_addr_base_and_unit_offset (ref, &baseoff2,
> +						      /* next_elmnt =  */ true);
> +	  if (next_field
> +	      && (lcr = least_common_record (basetype, member, next_field))
> +	      && TREE_CODE (lcr) == RECORD_TYPE
> +	      && known_ge (baseoff2, baseoff))
> +	    {
> +		poly_int64 size =  baseoff2 - baseoff;
> +		memsize = wide_int_to_tree (TREE_TYPE (DECL_SIZE_UNIT (base)),
> +					    size);
> +	    }
> +	  else
> +	    /* Use the size of the BASE object (possibly an array of some
> +	       other type such as char used to store the struct).  */
> +	    memsize = DECL_SIZE_UNIT (base);
> +	}
>         else
>   	return NULL_TREE;
>       }
>
Giuliano Belinassi June 14, 2021, 10:29 p.m. UTC | #4
Hi,

I will give an quick answer to this mail. I will analyze carefully what
richi said when I have more time available.

On Mon, 2021-06-14 at 15:55 -0600, Martin Sebor wrote:
> On 6/13/21 11:00 AM, Giuliano Belinassi wrote:
> > This patch proposes a fix to PR 100944 by improving the array
> > boundary
> 
> Thanks for the patch!
> 
> > computation when the flexible array has no clear constructor: if no
> > constructor were found in the input code, we compute the size of
> > the
> > array as:
> > 
> >    offset(array begin) - offset(next element in RECORD_TYPE)
> > 
> > If no next element is found in the RECORD, simply fall back to:
> > 
> >    offset(array begin) - offset(RECORD_TYPE ends)
> 
> I'm not sure I understand the distinction between these expressions.
> I'd expect them to give the same result: either zero when there's no
> tail padding or negative when there is some.  Is there a case when
> they don't?

You can't compute the offset of a next field if this next field does
not exists. Therefore, if the array has no next member in the base
struct, it is an trailing array and the second logic should apply.

> 
> I would think the fix to need to change just component_ref_size()
> without having to touch other utility functions.  It seems to me
> the problem is in the logic that deals with accesses to backing
> buffers, as in:
> 
>    char a[sizeof (struct Bx)];
>    struct Bx *p = (struct Bx*) a;
>    p->a.a[i] = 1;
> 
> This should also be diagnosed.
> 
> The offset of the next member shouldn't matter, just the number of
> elements in the array's initializer if it has one or the size of
> the struct whose member is being accessed (in addition to the offset
> of the member).

I don't really understeand how this detects the testcase on bugzilla,
as the access in falling into `long x`. I will need to know where the
next member offset is to find such cases.

Giuliano. 

> 
> > 
> > We avoid doing this calculation if the RECORD_TYPE is actually an
> > UNION_TYPE, once things get very complicated in this case.
> > 
> > gcc/ChangeLog:
> > 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
> > 
> >         PR middle-end/100944
> >         * tree-dfa.c (get_ref_base_and_unit_offset): Add option to
> > compute size
> >         of next field.
> >         * (get_ref_base_and_unit_offset_1): Same as above.
> >         * tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
> >         (get_ref_base_and_unit_offset_1): Same as above.
> >         * tree.c (least_common_record_1): New.
> >         (least_common_record): New.
> >         (component_ref_size): Improve array size calculation.
> > 
> > gcc/testsuite/ChangeLog:
> > 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
> > 
> >         PR middle-end/100944
> >         * gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
> >         * gcc.dg/Warray-bounds-71.c: New test.
> > ---
> >   gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
> >   .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
> >   gcc/tree-dfa.c                                |  31 ++++-
> >   gcc/tree-dfa.h                                |   6 +-
> >   gcc/tree.c                                    | 115
> > ++++++++++++++----
> >   5 files changed, 172 insertions(+), 40 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > 
> > diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > new file mode 100644
> > index 00000000000..cc5b083bc77
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > @@ -0,0 +1,42 @@
> > +/* PR middle-end/100944 - missing -Warray-bounds accessing a
> > flexible array
> > +   member of a nested struct
> > +   { dg-do compile }
> > +   { dg-options "-O2 -Wall" } */
> > +
> > +struct A0
> > +{
> > +  int i, a[0];
> > +};
> > +
> > +struct B0
> > +{
> > +  struct A0 a;
> > +  long x;
> > +} b0;
> > +
> > +void f0 (int i)
> > +{
> > +  long t = b0.x;
> > +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> > +  if (t != b0.x)    // folded to false
> > +    __builtin_abort ();
> > +}
> > +
> > +struct Ax
> > +{
> > +  int i, a[];
> > +};
> > +
> > +struct Bx
> > +{
> > +  struct Ax a;
> > +  long x;
> > +} bx;
> > +
> > +void fx (int i)
> > +{
> > +  long t = bx.x;
> > +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> > +  if (t != bx.x)    // folded to false
> > +    __builtin_abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > index 8e880d92dea..117b30ff294 100644
> > --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > @@ -68,21 +68,21 @@ extern struct Y y;
> >   
> >   void access_to_member (int i)
> >   {
> > -  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > -  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > -  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > +  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> > +  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> > +  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> 
> We do want to keep -Wzero-length-bounds for these accesses (as
> Richard
> already noted).  I think same for those below (whenever there's
> overlap
> between a zero-length array and a subsequent member).
> 
> >     sink (a);
> >   
> > -  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > -  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > +  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> > +  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >     /* Similar to the array case above, accesses to a subsequent
> > member
> >        of the "parent" struct seem like a more severe problem than
> > those
> >        to the next member of the same struct.  */
> > -  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > +  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >     sink (a);
> >   
> > -  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > -  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > -  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > +  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> > +  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> > +  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> >     y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> >   }
> > diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
> > index 1d20de0c400..dc3c15f11d5 100644
> > --- a/gcc/tree-dfa.c
> > +++ b/gcc/tree-dfa.c
> > @@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp,
> > HOST_WIDE_INT *poffset,
> >   
> >   tree
> >   get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod
> > *poffset,
> > -                                tree (*valueize) (tree))
> > +                                tree (*valueize) (tree),
> > +                                bool of_next_component /* =
> > false.  */)
> >   {
> >     poly_int64 byte_offset = 0;
> > +  tree next_field = NULL_TREE;
> >   
> >     /* Compute cumulative byte-offset for nested component-refs and
> > array-refs,
> >        and find the ultimate containing object.  */
> > @@ -797,11 +799,27 @@ get_addr_base_and_unit_offset_1 (tree exp,
> > poly_int64_pod *poffset,
> >         case COMPONENT_REF:
> >           {
> >             tree field = TREE_OPERAND (exp, 1);
> > -           tree this_offset = component_ref_field_offset (exp);
> > +           tree this_offset;
> > +
> >             poly_int64 hthis_offset;
> >   
> > +           if (of_next_component && !next_field)
> > +             {
> > +               /* We are looking for the next component of the
> > record.  */
> > +               next_field = TREE_CHAIN (field);
> > +               if (!next_field)
> > +                 break;
> > +
> > +               /* We found a next component.  Flag that we found
> > it and
> > +                  update the target field.  */
> > +               field = next_field;
> > +             }
> > +
> > +           this_offset = component_ref_field_offset (exp);
> > +
> >             if (!this_offset
> >                 || !poly_int_tree_p (this_offset, &hthis_offset)
> > +               || TREE_CODE (field) != FIELD_DECL
> >                 || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET
> > (field))
> >                     % BITS_PER_UNIT))
> >               return NULL_TREE;
> > @@ -904,6 +922,9 @@ get_addr_base_and_unit_offset_1 (tree exp,
> > poly_int64_pod *poffset,
> >   done:
> >   
> >     *poffset = byte_offset;
> > +
> > +  if (of_next_component)
> > +    return next_field;
> >     return exp;
> >   }
> >   
> > @@ -913,9 +934,11 @@ done:
> >      is not BITS_PER_UNIT-aligned.  */
> >   
> >   tree
> > -get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
> > +get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset,
> > bool
> > +                              of_next_component /* = false.  */)
> >   {
> > -  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
> > +  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
> > +                                         of_next_component);
> >   }
> >   
> >   /* Returns true if STMT references an SSA_NAME that has
> > diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
> > index b1457ab065c..94e44d9c3f6 100644
> > --- a/gcc/tree-dfa.h
> > +++ b/gcc/tree-dfa.h
> > @@ -34,8 +34,10 @@ extern tree get_ref_base_and_extent (tree,
> > poly_int64_pod *, poly_int64_pod *,
> >   extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
> >                                          HOST_WIDE_INT *, bool *);
> >   extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod
> > *,
> > -                                            tree (*) (tree));
> > -extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod
> > *);
> > +                                            tree (*) (tree),
> > +                                            bool of_next_component
> > = false);
> > +extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
> > +                                          bool = false);
> >   extern bool stmt_references_abnormal_ssa_name (gimple *);
> >   extern void replace_abnormal_ssa_names (gimple *);
> >   extern void dump_enumerated_decls (FILE *, dump_flags_t);
> > diff --git a/gcc/tree.c b/gcc/tree.c
> > index 1aa6e557a04..45d7fa2ae92 100644
> > --- a/gcc/tree.c
> > +++ b/gcc/tree.c
> > @@ -12649,6 +12649,47 @@ get_initializer_for (tree init, tree decl)
> >     return NULL_TREE;
> >   }
> >   
> > +static int
> > +least_common_record_1 (tree basetype, tree field1, tree field2,
> > +                      tree *least_basetype)
> > +{
> > +  int ret = 0;
> > +
> > +  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN
> > (fld))
> > +    {
> > +      if (fld == field1 || fld == field2)
> > +       ret++;
> > +
> > +      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
> > +         || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
> > +       ret += least_common_record_1 (TREE_TYPE (fld), field1,
> > field2,
> > +                                     least_basetype);
> > +    }
> > +
> > +  if (ret == 2)
> > +    {
> > +      *least_basetype = basetype;
> > +      ret++; /* Avoid getting in this block again if a common
> > basetype were
> > +               found.  */
> > +    }
> > +
> > +  return ret;
> > +}
> > +
> > +/* Find the least common RECORD type common to two FIELDS from
> > base.  */
> > +static tree
> > +least_common_record (tree basetype, tree field1, tree field2)
> > +{
> > +  if (!(TREE_CODE (basetype) == RECORD_TYPE
> > +       || TREE_CODE (basetype) == UNION_TYPE))
> > +    return NULL_TREE;
> > +
> > +  tree least_basetype = NULL_TREE;
> > +  least_common_record_1 (basetype, field1, field2,
> > &least_basetype);
> > +
> > +  return least_basetype;
> > +}
> > +
> >   /* Determines the size of the member referenced by the
> > COMPONENT_REF
> >      REF, using its initializer expression if necessary in order to
> >      determine the size of an initialized flexible array member.
> > @@ -12768,28 +12809,30 @@ component_ref_size (tree ref,
> > special_array_member *sam /* = NULL */)
> >     memsize = NULL_TREE;
> >   
> >     if (typematch)
> > -    /* MEMBER is a true flexible array member.  Compute its size
> > from
> > -       the initializer of the BASE object if it has one.  */
> > -    if (tree init = DECL_P (base) ? DECL_INITIAL (base) :
> > NULL_TREE)
> > -      if (init != error_mark_node)
> > -       {
> > -         init = get_initializer_for (init, member);
> > -         if (init)
> > -           {
> > -             memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> > -             if (tree refsize = TYPE_SIZE_UNIT (argtype))
> > -               {
> > -                 /* Use the larger of the initializer size and the
> > tail
> > -                    padding in the enclosing struct.  */
> > -                 poly_int64 rsz = tree_to_poly_int64 (refsize);
> > -                 rsz -= baseoff;
> > -                 if (known_lt (tree_to_poly_int64 (memsize), rsz))
> > -                   memsize = wide_int_to_tree (TREE_TYPE
> > (memsize), rsz);
> > -               }
> > -
> > -             baseoff = 0;
> > -           }
> > -       }
> > +    {
> > +      /* MEMBER is a true flexible array member.  Compute its size
> > from
> > +        the initializer of the BASE object if it has one.  */
> > +      if (tree init = DECL_P (base) ? DECL_INITIAL (base) :
> > NULL_TREE)
> > +       if (init != error_mark_node)
> > +         {
> > +           init = get_initializer_for (init, member);
> > +           if (init)
> > +             {
> > +               memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> > +               if (tree refsize = TYPE_SIZE_UNIT (argtype))
> > +                 {
> > +                   /* Use the larger of the initializer size and
> > the tail
> > +                      padding in the enclosing struct.  */
> > +                   poly_int64 rsz = tree_to_poly_int64 (refsize);
> > +                   rsz -= baseoff;
> > +                   if (known_lt (tree_to_poly_int64 (memsize),
> > rsz))
> > +                     memsize = wide_int_to_tree (TREE_TYPE
> > (memsize), rsz);
> > +                 }
> > +
> > +               baseoff = 0;
> > +             }
> > +         }
> > +    }
> >   
> >     if (!memsize)
> >       {
> > @@ -12810,9 +12853,31 @@ component_ref_size (tree ref,
> > special_array_member *sam /* = NULL */)
> >           memsize = TYPE_SIZE_UNIT (bt);
> >         }
> >         else if (DECL_P (base))
> > -       /* Use the size of the BASE object (possibly an array of
> > some
> > -          other type such as char used to store the struct).  */
> > -       memsize = DECL_SIZE_UNIT (base);
> > +       {
> > +         /* It is possible that the flexible array has no
> > constructor at all.
> > +            In such cases, GCC will allocate the array in some
> > weird way.
> 
> Weird isn't quite the word.  If there's no initializer there may be
> tail
> padding after the flexible array member.  This is allowed by C and
> for
> declared objects fixed by each ABI.  The padding may be used to store
> elements of the flexible array member and so accesses to such
> elements
> should not be diagnosed.  But this is only an exception for flexible
> array members, not for any other member arrays.
> 
> By the way, I think a similar problem exists in check_mem_ref() for
> accesses to flexible array members by pointers, although the fix is
> probably going to be different:
> 
>    void fp (int i)
>    {
>      int *q = bx.a.a;
>      q[i] = 0;   // missing -Warray-bounds
>    }
> 
> Longer term, to reduce code duplication, I'd like to replace the
> logic
> in gimple-array-bounds.cc with the pointer_query class which is used
> for all the other access warnings.  But that's a bigger project.
> 
> Martin
> 
> > +            Assume that the array size is the difference between
> > the start
> > +            address of the array and the next component, if it
> > exists.  */
> > +
> > +         tree lcr;
> > +
> > +         poly_int64 baseoff2 = 0;
> > +         tree next_field = get_addr_base_and_unit_offset (ref,
> > &baseoff2,
> > +                                                     /* next_elmnt
> > =  */ true);
> > +         if (next_field
> > +             && (lcr = least_common_record (basetype, member,
> > next_field))
> > +             && TREE_CODE (lcr) == RECORD_TYPE
> > +             && known_ge (baseoff2, baseoff))
> > +           {
> > +               poly_int64 size =  baseoff2 - baseoff;
> > +               memsize = wide_int_to_tree (TREE_TYPE
> > (DECL_SIZE_UNIT (base)),
> > +                                           size);
> > +           }
> > +         else
> > +           /* Use the size of the BASE object (possibly an array
> > of some
> > +              other type such as char used to store the struct). 
> > */
> > +           memsize = DECL_SIZE_UNIT (base);
> > +       }
> >         else
> >         return NULL_TREE;
> >       }
> > 
>
Martin Sebor June 14, 2021, 11:48 p.m. UTC | #5
On 6/14/21 4:29 PM, Giuliano Belinassi wrote:
> Hi,
> 
> I will give an quick answer to this mail. I will analyze carefully what
> richi said when I have more time available.
> 
> On Mon, 2021-06-14 at 15:55 -0600, Martin Sebor wrote:
>> On 6/13/21 11:00 AM, Giuliano Belinassi wrote:
>>> This patch proposes a fix to PR 100944 by improving the array
>>> boundary
>>
>> Thanks for the patch!
>>
>>> computation when the flexible array has no clear constructor: if no
>>> constructor were found in the input code, we compute the size of
>>> the
>>> array as:
>>>
>>>     offset(array begin) - offset(next element in RECORD_TYPE)
>>>
>>> If no next element is found in the RECORD, simply fall back to:
>>>
>>>     offset(array begin) - offset(RECORD_TYPE ends)
>>
>> I'm not sure I understand the distinction between these expressions.
>> I'd expect them to give the same result: either zero when there's no
>> tail padding or negative when there is some.  Is there a case when
>> they don't?
> 
> You can't compute the offset of a next field if this next field does
> not exists. Therefore, if the array has no next member in the base
> struct, it is an trailing array and the second logic should apply.

I see.  The amount of tail padding in the nested struct depends on
the alignment of the subsequent member in the outer struct.  So for
example, in this code:

   struct A { char n, a[]; };
   struct B { struct A a; __INT64_TYPE__ x; } b;

   void f (int i)
   {
     b.a.a[7] = 7;    // no waring here
     b.a.a[8] = 8;    // -Warray-bounds here
     b.a.a[i] = i;    // no warning
   }

we want to warn on the second access but not on the other two.

I guess that makes sense since nested flexible arrays, while invalid
C, are accepted as a GCC extension.  The patch needs to then add test
cases to verify that (with different amounts of tail padding to make
sure the computation is correct).

> 
>>
>> I would think the fix to need to change just component_ref_size()
>> without having to touch other utility functions.  It seems to me
>> the problem is in the logic that deals with accesses to backing
>> buffers, as in:
>>
>>     char a[sizeof (struct Bx)];
>>     struct Bx *p = (struct Bx*) a;
>>     p->a.a[i] = 1;
>>
>> This should also be diagnosed.
>>
>> The offset of the next member shouldn't matter, just the number of
>> elements in the array's initializer if it has one or the size of
>> the struct whose member is being accessed (in addition to the offset
>> of the member).
> 
> I don't really understeand how this detects the testcase on bugzilla,
> as the access in falling into `long x`. I will need to know where the
> next member offset is to find such cases.

What I mean is that the problem in both cases is the same and so is
very likely the root cause of it not being diagnosed, so both should
be fixed.  (In the above there happens to be no tail padding so
the access is invalid regardless of the value of i, but when there
is a nozero amount of tail padding the warning should obviously only
trigger when the access is out of bounds).

Martin

> 
> Giuliano.
> 
>>
>>>
>>> We avoid doing this calculation if the RECORD_TYPE is actually an
>>> UNION_TYPE, once things get very complicated in this case.
>>>
>>> gcc/ChangeLog:
>>> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>>>
>>>          PR middle-end/100944
>>>          * tree-dfa.c (get_ref_base_and_unit_offset): Add option to
>>> compute size
>>>          of next field.
>>>          * (get_ref_base_and_unit_offset_1): Same as above.
>>>          * tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
>>>          (get_ref_base_and_unit_offset_1): Same as above.
>>>          * tree.c (least_common_record_1): New.
>>>          (least_common_record): New.
>>>          (component_ref_size): Improve array size calculation.
>>>
>>> gcc/testsuite/ChangeLog:
>>> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>>>
>>>          PR middle-end/100944
>>>          * gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
>>>          * gcc.dg/Warray-bounds-71.c: New test.
>>> ---
>>>    gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
>>>    .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
>>>    gcc/tree-dfa.c                                |  31 ++++-
>>>    gcc/tree-dfa.h                                |   6 +-
>>>    gcc/tree.c                                    | 115
>>> ++++++++++++++----
>>>    5 files changed, 172 insertions(+), 40 deletions(-)
>>>    create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>>
>>> diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>> b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>> new file mode 100644
>>> index 00000000000..cc5b083bc77
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>> @@ -0,0 +1,42 @@
>>> +/* PR middle-end/100944 - missing -Warray-bounds accessing a
>>> flexible array
>>> +   member of a nested struct
>>> +   { dg-do compile }
>>> +   { dg-options "-O2 -Wall" } */
>>> +
>>> +struct A0
>>> +{
>>> +  int i, a[0];
>>> +};
>>> +
>>> +struct B0
>>> +{
>>> +  struct A0 a;
>>> +  long x;
>>> +} b0;
>>> +
>>> +void f0 (int i)
>>> +{
>>> +  long t = b0.x;
>>> +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
>>> +  if (t != b0.x)    // folded to false
>>> +    __builtin_abort ();
>>> +}
>>> +
>>> +struct Ax
>>> +{
>>> +  int i, a[];
>>> +};
>>> +
>>> +struct Bx
>>> +{
>>> +  struct Ax a;
>>> +  long x;
>>> +} bx;
>>> +
>>> +void fx (int i)
>>> +{
>>> +  long t = bx.x;
>>> +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
>>> +  if (t != bx.x)    // folded to false
>>> +    __builtin_abort ();
>>> +}
>>> diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>>> b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>>> index 8e880d92dea..117b30ff294 100644
>>> --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>>> +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>>> @@ -68,21 +68,21 @@ extern struct Y y;
>>>    
>>>    void access_to_member (int i)
>>>    {
>>> -  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> -  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> -  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> +  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>> +  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>> +  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>
>> We do want to keep -Wzero-length-bounds for these accesses (as
>> Richard
>> already noted).  I think same for those below (whenever there's
>> overlap
>> between a zero-length array and a subsequent member).
>>
>>>      sink (a);
>>>    
>>> -  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> -  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> +  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>> +  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>      /* Similar to the array case above, accesses to a subsequent
>>> member
>>>         of the "parent" struct seem like a more severe problem than
>>> those
>>>         to the next member of the same struct.  */
>>> -  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> +  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>      sink (a);
>>>    
>>> -  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> -  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> -  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds"
>>> }
>>> +  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>> +  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>> +  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>>      y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>>    }
>>> diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
>>> index 1d20de0c400..dc3c15f11d5 100644
>>> --- a/gcc/tree-dfa.c
>>> +++ b/gcc/tree-dfa.c
>>> @@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp,
>>> HOST_WIDE_INT *poffset,
>>>    
>>>    tree
>>>    get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod
>>> *poffset,
>>> -                                tree (*valueize) (tree))
>>> +                                tree (*valueize) (tree),
>>> +                                bool of_next_component /* =
>>> false.  */)
>>>    {
>>>      poly_int64 byte_offset = 0;
>>> +  tree next_field = NULL_TREE;
>>>    
>>>      /* Compute cumulative byte-offset for nested component-refs and
>>> array-refs,
>>>         and find the ultimate containing object.  */
>>> @@ -797,11 +799,27 @@ get_addr_base_and_unit_offset_1 (tree exp,
>>> poly_int64_pod *poffset,
>>>          case COMPONENT_REF:
>>>            {
>>>              tree field = TREE_OPERAND (exp, 1);
>>> -           tree this_offset = component_ref_field_offset (exp);
>>> +           tree this_offset;
>>> +
>>>              poly_int64 hthis_offset;
>>>    
>>> +           if (of_next_component && !next_field)
>>> +             {
>>> +               /* We are looking for the next component of the
>>> record.  */
>>> +               next_field = TREE_CHAIN (field);
>>> +               if (!next_field)
>>> +                 break;
>>> +
>>> +               /* We found a next component.  Flag that we found
>>> it and
>>> +                  update the target field.  */
>>> +               field = next_field;
>>> +             }
>>> +
>>> +           this_offset = component_ref_field_offset (exp);
>>> +
>>>              if (!this_offset
>>>                  || !poly_int_tree_p (this_offset, &hthis_offset)
>>> +               || TREE_CODE (field) != FIELD_DECL
>>>                  || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET
>>> (field))
>>>                      % BITS_PER_UNIT))
>>>                return NULL_TREE;
>>> @@ -904,6 +922,9 @@ get_addr_base_and_unit_offset_1 (tree exp,
>>> poly_int64_pod *poffset,
>>>    done:
>>>    
>>>      *poffset = byte_offset;
>>> +
>>> +  if (of_next_component)
>>> +    return next_field;
>>>      return exp;
>>>    }
>>>    
>>> @@ -913,9 +934,11 @@ done:
>>>       is not BITS_PER_UNIT-aligned.  */
>>>    
>>>    tree
>>> -get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
>>> +get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset,
>>> bool
>>> +                              of_next_component /* = false.  */)
>>>    {
>>> -  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
>>> +  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
>>> +                                         of_next_component);
>>>    }
>>>    
>>>    /* Returns true if STMT references an SSA_NAME that has
>>> diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
>>> index b1457ab065c..94e44d9c3f6 100644
>>> --- a/gcc/tree-dfa.h
>>> +++ b/gcc/tree-dfa.h
>>> @@ -34,8 +34,10 @@ extern tree get_ref_base_and_extent (tree,
>>> poly_int64_pod *, poly_int64_pod *,
>>>    extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
>>>                                           HOST_WIDE_INT *, bool *);
>>>    extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod
>>> *,
>>> -                                            tree (*) (tree));
>>> -extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod
>>> *);
>>> +                                            tree (*) (tree),
>>> +                                            bool of_next_component
>>> = false);
>>> +extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
>>> +                                          bool = false);
>>>    extern bool stmt_references_abnormal_ssa_name (gimple *);
>>>    extern void replace_abnormal_ssa_names (gimple *);
>>>    extern void dump_enumerated_decls (FILE *, dump_flags_t);
>>> diff --git a/gcc/tree.c b/gcc/tree.c
>>> index 1aa6e557a04..45d7fa2ae92 100644
>>> --- a/gcc/tree.c
>>> +++ b/gcc/tree.c
>>> @@ -12649,6 +12649,47 @@ get_initializer_for (tree init, tree decl)
>>>      return NULL_TREE;
>>>    }
>>>    
>>> +static int
>>> +least_common_record_1 (tree basetype, tree field1, tree field2,
>>> +                      tree *least_basetype)
>>> +{
>>> +  int ret = 0;
>>> +
>>> +  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN
>>> (fld))
>>> +    {
>>> +      if (fld == field1 || fld == field2)
>>> +       ret++;
>>> +
>>> +      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
>>> +         || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
>>> +       ret += least_common_record_1 (TREE_TYPE (fld), field1,
>>> field2,
>>> +                                     least_basetype);
>>> +    }
>>> +
>>> +  if (ret == 2)
>>> +    {
>>> +      *least_basetype = basetype;
>>> +      ret++; /* Avoid getting in this block again if a common
>>> basetype were
>>> +               found.  */
>>> +    }
>>> +
>>> +  return ret;
>>> +}
>>> +
>>> +/* Find the least common RECORD type common to two FIELDS from
>>> base.  */
>>> +static tree
>>> +least_common_record (tree basetype, tree field1, tree field2)
>>> +{
>>> +  if (!(TREE_CODE (basetype) == RECORD_TYPE
>>> +       || TREE_CODE (basetype) == UNION_TYPE))
>>> +    return NULL_TREE;
>>> +
>>> +  tree least_basetype = NULL_TREE;
>>> +  least_common_record_1 (basetype, field1, field2,
>>> &least_basetype);
>>> +
>>> +  return least_basetype;
>>> +}
>>> +
>>>    /* Determines the size of the member referenced by the
>>> COMPONENT_REF
>>>       REF, using its initializer expression if necessary in order to
>>>       determine the size of an initialized flexible array member.
>>> @@ -12768,28 +12809,30 @@ component_ref_size (tree ref,
>>> special_array_member *sam /* = NULL */)
>>>      memsize = NULL_TREE;
>>>    
>>>      if (typematch)
>>> -    /* MEMBER is a true flexible array member.  Compute its size
>>> from
>>> -       the initializer of the BASE object if it has one.  */
>>> -    if (tree init = DECL_P (base) ? DECL_INITIAL (base) :
>>> NULL_TREE)
>>> -      if (init != error_mark_node)
>>> -       {
>>> -         init = get_initializer_for (init, member);
>>> -         if (init)
>>> -           {
>>> -             memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
>>> -             if (tree refsize = TYPE_SIZE_UNIT (argtype))
>>> -               {
>>> -                 /* Use the larger of the initializer size and the
>>> tail
>>> -                    padding in the enclosing struct.  */
>>> -                 poly_int64 rsz = tree_to_poly_int64 (refsize);
>>> -                 rsz -= baseoff;
>>> -                 if (known_lt (tree_to_poly_int64 (memsize), rsz))
>>> -                   memsize = wide_int_to_tree (TREE_TYPE
>>> (memsize), rsz);
>>> -               }
>>> -
>>> -             baseoff = 0;
>>> -           }
>>> -       }
>>> +    {
>>> +      /* MEMBER is a true flexible array member.  Compute its size
>>> from
>>> +        the initializer of the BASE object if it has one.  */
>>> +      if (tree init = DECL_P (base) ? DECL_INITIAL (base) :
>>> NULL_TREE)
>>> +       if (init != error_mark_node)
>>> +         {
>>> +           init = get_initializer_for (init, member);
>>> +           if (init)
>>> +             {
>>> +               memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
>>> +               if (tree refsize = TYPE_SIZE_UNIT (argtype))
>>> +                 {
>>> +                   /* Use the larger of the initializer size and
>>> the tail
>>> +                      padding in the enclosing struct.  */
>>> +                   poly_int64 rsz = tree_to_poly_int64 (refsize);
>>> +                   rsz -= baseoff;
>>> +                   if (known_lt (tree_to_poly_int64 (memsize),
>>> rsz))
>>> +                     memsize = wide_int_to_tree (TREE_TYPE
>>> (memsize), rsz);
>>> +                 }
>>> +
>>> +               baseoff = 0;
>>> +             }
>>> +         }
>>> +    }
>>>    
>>>      if (!memsize)
>>>        {
>>> @@ -12810,9 +12853,31 @@ component_ref_size (tree ref,
>>> special_array_member *sam /* = NULL */)
>>>            memsize = TYPE_SIZE_UNIT (bt);
>>>          }
>>>          else if (DECL_P (base))
>>> -       /* Use the size of the BASE object (possibly an array of
>>> some
>>> -          other type such as char used to store the struct).  */
>>> -       memsize = DECL_SIZE_UNIT (base);
>>> +       {
>>> +         /* It is possible that the flexible array has no
>>> constructor at all.
>>> +            In such cases, GCC will allocate the array in some
>>> weird way.
>>
>> Weird isn't quite the word.  If there's no initializer there may be
>> tail
>> padding after the flexible array member.  This is allowed by C and
>> for
>> declared objects fixed by each ABI.  The padding may be used to store
>> elements of the flexible array member and so accesses to such
>> elements
>> should not be diagnosed.  But this is only an exception for flexible
>> array members, not for any other member arrays.
>>
>> By the way, I think a similar problem exists in check_mem_ref() for
>> accesses to flexible array members by pointers, although the fix is
>> probably going to be different:
>>
>>     void fp (int i)
>>     {
>>       int *q = bx.a.a;
>>       q[i] = 0;   // missing -Warray-bounds
>>     }
>>
>> Longer term, to reduce code duplication, I'd like to replace the
>> logic
>> in gimple-array-bounds.cc with the pointer_query class which is used
>> for all the other access warnings.  But that's a bigger project.
>>
>> Martin
>>
>>> +            Assume that the array size is the difference between
>>> the start
>>> +            address of the array and the next component, if it
>>> exists.  */
>>> +
>>> +         tree lcr;
>>> +
>>> +         poly_int64 baseoff2 = 0;
>>> +         tree next_field = get_addr_base_and_unit_offset (ref,
>>> &baseoff2,
>>> +                                                     /* next_elmnt
>>> =  */ true);
>>> +         if (next_field
>>> +             && (lcr = least_common_record (basetype, member,
>>> next_field))
>>> +             && TREE_CODE (lcr) == RECORD_TYPE
>>> +             && known_ge (baseoff2, baseoff))
>>> +           {
>>> +               poly_int64 size =  baseoff2 - baseoff;
>>> +               memsize = wide_int_to_tree (TREE_TYPE
>>> (DECL_SIZE_UNIT (base)),
>>> +                                           size);
>>> +           }
>>> +         else
>>> +           /* Use the size of the BASE object (possibly an array
>>> of some
>>> +              other type such as char used to store the struct).
>>> */
>>> +           memsize = DECL_SIZE_UNIT (base);
>>> +       }
>>>          else
>>>          return NULL_TREE;
>>>        }
>>>
>>
> 
>
Richard Biener June 15, 2021, 6:23 a.m. UTC | #6
On Mon, Jun 14, 2021 at 7:32 PM Martin Sebor <msebor@gmail.com> wrote:
>
> On 6/14/21 12:36 AM, Richard Biener via Gcc-patches wrote:
> > On Sun, Jun 13, 2021 at 7:00 PM Giuliano Belinassi via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> >>
> >> This patch proposes a fix to PR 100944 by improving the array boundary
> >> computation when the flexible array has no clear constructor: if no
> >> constructor were found in the input code, we compute the size of the
> >> array as:
> >>
> >>    offset(array begin) - offset(next element in RECORD_TYPE)
> >>
> >> If no next element is found in the RECORD, simply fall back to:
> >>
> >>    offset(array begin) - offset(RECORD_TYPE ends)
> >>
> >> We avoid doing this calculation if the RECORD_TYPE is actually an
> >> UNION_TYPE, once things get very complicated in this case.
> >
> > I'm looking at Wzero-length-array-bounds.c and wonder why the patch
> > makes a difference here.
> >
> > struct X { int a[0]; int b, c; };
> >
> > a is clearly not a flex-array?
>
> The warning issued here shouldn't change.  -Wzero-length-bounds is
> documented to
>
>    Warn about Accesses to elements of zero-length array members that
>    might overlap other members of the same object.
>
> By "the object" the text means the complete object (if one can be
> determined), not necessarily just the subobject of the enclosing
> struct.  This is contrast to accesses to zero-length arrays that
> are out of the bounds of the complete object.  This distinction
> was made to help the kernel transition to the new warning (I'm
> not sure if they're done with the cleanup yet).
>
> >
> > The code in component_ref_size is truly weird.  IMHO using
> > get_ref_base_and_unit_offset for computing the size of a flexarray
> > member is the wrong utility - instead a much better choice
> > (also for consistency) would be array_at_struct_end_p itself,
> > for example by giving it an optional HOST_WIDE_INT *size
> > argument.
>
> array_at_struct_end_p() is permissive and treats all trailing arrays
> as effectively unbounded if it doesn't know the enclosing object.
> -Warray-bounds and all other access warnings are more conservative
> and treat only flexible array members and trailing arrays of zero
> length as unbounded (except in complete objects where they consider
> the initializer for the former).

Sure, but that distinction is easy to fix for the consumer by looking at
the array field in case array_at_struct_end_p returns true.  What I'm
saying is that array_at_struct_end_p already performs analysis
whether the array is the last field in the object and it already performs
analysis of the object size - meaning it computes the possible extent
of the array (and it matches that up with the array declaration).  It could
simply communicate the length [bound] to the caller.

And if looking at an initializer is really necessary (IMHO that smells like
the FE issue that fails to adjust DECL_SIZE of the object from it?) then
even array_at_struct_end_p could be improved by looking at it by
passing the (optional) initializer to the function.

Richard.

> Martin
>
> >
> > Richard.
> >
> >> gcc/ChangeLog:
> >> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
> >>
> >>          PR middle-end/100944
> >>          * tree-dfa.c (get_ref_base_and_unit_offset): Add option to compute size
> >>          of next field.
> >>          * (get_ref_base_and_unit_offset_1): Same as above.
> >>          * tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
> >>          (get_ref_base_and_unit_offset_1): Same as above.
> >>          * tree.c (least_common_record_1): New.
> >>          (least_common_record): New.
> >>          (component_ref_size): Improve array size calculation.
> >>
> >> gcc/testsuite/ChangeLog:
> >> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
> >>
> >>          PR middle-end/100944
> >>          * gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
> >>          * gcc.dg/Warray-bounds-71.c: New test.
> >> ---
> >>   gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
> >>   .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
> >>   gcc/tree-dfa.c                                |  31 ++++-
> >>   gcc/tree-dfa.h                                |   6 +-
> >>   gcc/tree.c                                    | 115 ++++++++++++++----
> >>   5 files changed, 172 insertions(+), 40 deletions(-)
> >>   create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> >> new file mode 100644
> >> index 00000000000..cc5b083bc77
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> >> @@ -0,0 +1,42 @@
> >> +/* PR middle-end/100944 - missing -Warray-bounds accessing a flexible array
> >> +   member of a nested struct
> >> +   { dg-do compile }
> >> +   { dg-options "-O2 -Wall" } */
> >> +
> >> +struct A0
> >> +{
> >> +  int i, a[0];
> >> +};
> >> +
> >> +struct B0
> >> +{
> >> +  struct A0 a;
> >> +  long x;
> >> +} b0;
> >> +
> >> +void f0 (int i)
> >> +{
> >> +  long t = b0.x;
> >> +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> >> +  if (t != b0.x)    // folded to false
> >> +    __builtin_abort ();
> >> +}
> >> +
> >> +struct Ax
> >> +{
> >> +  int i, a[];
> >> +};
> >> +
> >> +struct Bx
> >> +{
> >> +  struct Ax a;
> >> +  long x;
> >> +} bx;
> >> +
> >> +void fx (int i)
> >> +{
> >> +  long t = bx.x;
> >> +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> >> +  if (t != bx.x)    // folded to false
> >> +    __builtin_abort ();
> >> +}
> >> diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> >> index 8e880d92dea..117b30ff294 100644
> >> --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> >> +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> >> @@ -68,21 +68,21 @@ extern struct Y y;
> >>
> >>   void access_to_member (int i)
> >>   {
> >> -  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> >> -  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> >> -  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> >> +  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >> +  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >> +  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >>     sink (a);
> >>
> >> -  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> >> -  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> >> +  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >> +  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >>     /* Similar to the array case above, accesses to a subsequent member
> >>        of the "parent" struct seem like a more severe problem than those
> >>        to the next member of the same struct.  */
> >> -  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
> >> +  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
> >>     sink (a);
> >>
> >> -  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> >> -  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> >> -  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
> >> +  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> >> +  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> >> +  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> >>     y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
> >>   }
> >> diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
> >> index 1d20de0c400..dc3c15f11d5 100644
> >> --- a/gcc/tree-dfa.c
> >> +++ b/gcc/tree-dfa.c
> >> @@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT *poffset,
> >>
> >>   tree
> >>   get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
> >> -                                tree (*valueize) (tree))
> >> +                                tree (*valueize) (tree),
> >> +                                bool of_next_component /* = false.  */)
> >>   {
> >>     poly_int64 byte_offset = 0;
> >> +  tree next_field = NULL_TREE;
> >>
> >>     /* Compute cumulative byte-offset for nested component-refs and array-refs,
> >>        and find the ultimate containing object.  */
> >> @@ -797,11 +799,27 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
> >>          case COMPONENT_REF:
> >>            {
> >>              tree field = TREE_OPERAND (exp, 1);
> >> -           tree this_offset = component_ref_field_offset (exp);
> >> +           tree this_offset;
> >> +
> >>              poly_int64 hthis_offset;
> >>
> >> +           if (of_next_component && !next_field)
> >> +             {
> >> +               /* We are looking for the next component of the record.  */
> >> +               next_field = TREE_CHAIN (field);
> >> +               if (!next_field)
> >> +                 break;
> >> +
> >> +               /* We found a next component.  Flag that we found it and
> >> +                  update the target field.  */
> >> +               field = next_field;
> >> +             }
> >> +
> >> +           this_offset = component_ref_field_offset (exp);
> >> +
> >>              if (!this_offset
> >>                  || !poly_int_tree_p (this_offset, &hthis_offset)
> >> +               || TREE_CODE (field) != FIELD_DECL
> >>                  || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
> >>                      % BITS_PER_UNIT))
> >>                return NULL_TREE;
> >> @@ -904,6 +922,9 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
> >>   done:
> >>
> >>     *poffset = byte_offset;
> >> +
> >> +  if (of_next_component)
> >> +    return next_field;
> >>     return exp;
> >>   }
> >>
> >> @@ -913,9 +934,11 @@ done:
> >>      is not BITS_PER_UNIT-aligned.  */
> >>
> >>   tree
> >> -get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
> >> +get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset, bool
> >> +                              of_next_component /* = false.  */)
> >>   {
> >> -  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
> >> +  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
> >> +                                         of_next_component);
> >>   }
> >>
> >>   /* Returns true if STMT references an SSA_NAME that has
> >> diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
> >> index b1457ab065c..94e44d9c3f6 100644
> >> --- a/gcc/tree-dfa.h
> >> +++ b/gcc/tree-dfa.h
> >> @@ -34,8 +34,10 @@ extern tree get_ref_base_and_extent (tree, poly_int64_pod *, poly_int64_pod *,
> >>   extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
> >>                                           HOST_WIDE_INT *, bool *);
> >>   extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod *,
> >> -                                            tree (*) (tree));
> >> -extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *);
> >> +                                            tree (*) (tree),
> >> +                                            bool of_next_component = false);
> >> +extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
> >> +                                          bool = false);
> >>   extern bool stmt_references_abnormal_ssa_name (gimple *);
> >>   extern void replace_abnormal_ssa_names (gimple *);
> >>   extern void dump_enumerated_decls (FILE *, dump_flags_t);
> >> diff --git a/gcc/tree.c b/gcc/tree.c
> >> index 1aa6e557a04..45d7fa2ae92 100644
> >> --- a/gcc/tree.c
> >> +++ b/gcc/tree.c
> >> @@ -12649,6 +12649,47 @@ get_initializer_for (tree init, tree decl)
> >>     return NULL_TREE;
> >>   }
> >>
> >> +static int
> >> +least_common_record_1 (tree basetype, tree field1, tree field2,
> >> +                      tree *least_basetype)
> >> +{
> >> +  int ret = 0;
> >> +
> >> +  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN (fld))
> >> +    {
> >> +      if (fld == field1 || fld == field2)
> >> +       ret++;
> >> +
> >> +      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
> >> +         || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
> >> +       ret += least_common_record_1 (TREE_TYPE (fld), field1, field2,
> >> +                                     least_basetype);
> >> +    }
> >> +
> >> +  if (ret == 2)
> >> +    {
> >> +      *least_basetype = basetype;
> >> +      ret++; /* Avoid getting in this block again if a common basetype were
> >> +               found.  */
> >> +    }
> >> +
> >> +  return ret;
> >> +}
> >> +
> >> +/* Find the least common RECORD type common to two FIELDS from base.  */
> >> +static tree
> >> +least_common_record (tree basetype, tree field1, tree field2)
> >> +{
> >> +  if (!(TREE_CODE (basetype) == RECORD_TYPE
> >> +       || TREE_CODE (basetype) == UNION_TYPE))
> >> +    return NULL_TREE;
> >> +
> >> +  tree least_basetype = NULL_TREE;
> >> +  least_common_record_1 (basetype, field1, field2, &least_basetype);
> >> +
> >> +  return least_basetype;
> >> +}
> >> +
> >>   /* Determines the size of the member referenced by the COMPONENT_REF
> >>      REF, using its initializer expression if necessary in order to
> >>      determine the size of an initialized flexible array member.
> >> @@ -12768,28 +12809,30 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
> >>     memsize = NULL_TREE;
> >>
> >>     if (typematch)
> >> -    /* MEMBER is a true flexible array member.  Compute its size from
> >> -       the initializer of the BASE object if it has one.  */
> >> -    if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
> >> -      if (init != error_mark_node)
> >> -       {
> >> -         init = get_initializer_for (init, member);
> >> -         if (init)
> >> -           {
> >> -             memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> >> -             if (tree refsize = TYPE_SIZE_UNIT (argtype))
> >> -               {
> >> -                 /* Use the larger of the initializer size and the tail
> >> -                    padding in the enclosing struct.  */
> >> -                 poly_int64 rsz = tree_to_poly_int64 (refsize);
> >> -                 rsz -= baseoff;
> >> -                 if (known_lt (tree_to_poly_int64 (memsize), rsz))
> >> -                   memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
> >> -               }
> >> -
> >> -             baseoff = 0;
> >> -           }
> >> -       }
> >> +    {
> >> +      /* MEMBER is a true flexible array member.  Compute its size from
> >> +        the initializer of the BASE object if it has one.  */
> >> +      if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
> >> +       if (init != error_mark_node)
> >> +         {
> >> +           init = get_initializer_for (init, member);
> >> +           if (init)
> >> +             {
> >> +               memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
> >> +               if (tree refsize = TYPE_SIZE_UNIT (argtype))
> >> +                 {
> >> +                   /* Use the larger of the initializer size and the tail
> >> +                      padding in the enclosing struct.  */
> >> +                   poly_int64 rsz = tree_to_poly_int64 (refsize);
> >> +                   rsz -= baseoff;
> >> +                   if (known_lt (tree_to_poly_int64 (memsize), rsz))
> >> +                     memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
> >> +                 }
> >> +
> >> +               baseoff = 0;
> >> +             }
> >> +         }
> >> +    }
> >>
> >>     if (!memsize)
> >>       {
> >> @@ -12810,9 +12853,31 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
> >>            memsize = TYPE_SIZE_UNIT (bt);
> >>          }
> >>         else if (DECL_P (base))
> >> -       /* Use the size of the BASE object (possibly an array of some
> >> -          other type such as char used to store the struct).  */
> >> -       memsize = DECL_SIZE_UNIT (base);
> >> +       {
> >> +         /* It is possible that the flexible array has no constructor at all.
> >> +            In such cases, GCC will allocate the array in some weird way.
> >> +            Assume that the array size is the difference between the start
> >> +            address of the array and the next component, if it exists.  */
> >> +
> >> +         tree lcr;
> >> +
> >> +         poly_int64 baseoff2 = 0;
> >> +         tree next_field = get_addr_base_and_unit_offset (ref, &baseoff2,
> >> +                                                     /* next_elmnt =  */ true);
> >> +         if (next_field
> >> +             && (lcr = least_common_record (basetype, member, next_field))
> >> +             && TREE_CODE (lcr) == RECORD_TYPE
> >> +             && known_ge (baseoff2, baseoff))
> >> +           {
> >> +               poly_int64 size =  baseoff2 - baseoff;
> >> +               memsize = wide_int_to_tree (TREE_TYPE (DECL_SIZE_UNIT (base)),
> >> +                                           size);
> >> +           }
> >> +         else
> >> +           /* Use the size of the BASE object (possibly an array of some
> >> +              other type such as char used to store the struct).  */
> >> +           memsize = DECL_SIZE_UNIT (base);
> >> +       }
> >>         else
> >>          return NULL_TREE;
> >>       }
> >> --
> >> 2.32.0
> >>
>
Martin Sebor June 16, 2021, 12:22 a.m. UTC | #7
On 6/15/21 12:23 AM, Richard Biener wrote:
> On Mon, Jun 14, 2021 at 7:32 PM Martin Sebor <msebor@gmail.com> wrote:
>>
>> On 6/14/21 12:36 AM, Richard Biener via Gcc-patches wrote:
>>> On Sun, Jun 13, 2021 at 7:00 PM Giuliano Belinassi via Gcc-patches
>>> <gcc-patches@gcc.gnu.org> wrote:
>>>>
>>>> This patch proposes a fix to PR 100944 by improving the array boundary
>>>> computation when the flexible array has no clear constructor: if no
>>>> constructor were found in the input code, we compute the size of the
>>>> array as:
>>>>
>>>>     offset(array begin) - offset(next element in RECORD_TYPE)
>>>>
>>>> If no next element is found in the RECORD, simply fall back to:
>>>>
>>>>     offset(array begin) - offset(RECORD_TYPE ends)
>>>>
>>>> We avoid doing this calculation if the RECORD_TYPE is actually an
>>>> UNION_TYPE, once things get very complicated in this case.
>>>
>>> I'm looking at Wzero-length-array-bounds.c and wonder why the patch
>>> makes a difference here.
>>>
>>> struct X { int a[0]; int b, c; };
>>>
>>> a is clearly not a flex-array?
>>
>> The warning issued here shouldn't change.  -Wzero-length-bounds is
>> documented to
>>
>>     Warn about Accesses to elements of zero-length array members that
>>     might overlap other members of the same object.
>>
>> By "the object" the text means the complete object (if one can be
>> determined), not necessarily just the subobject of the enclosing
>> struct.  This is contrast to accesses to zero-length arrays that
>> are out of the bounds of the complete object.  This distinction
>> was made to help the kernel transition to the new warning (I'm
>> not sure if they're done with the cleanup yet).
>>
>>>
>>> The code in component_ref_size is truly weird.  IMHO using
>>> get_ref_base_and_unit_offset for computing the size of a flexarray
>>> member is the wrong utility - instead a much better choice
>>> (also for consistency) would be array_at_struct_end_p itself,
>>> for example by giving it an optional HOST_WIDE_INT *size
>>> argument.
>>
>> array_at_struct_end_p() is permissive and treats all trailing arrays
>> as effectively unbounded if it doesn't know the enclosing object.
>> -Warray-bounds and all other access warnings are more conservative
>> and treat only flexible array members and trailing arrays of zero
>> length as unbounded (except in complete objects where they consider
>> the initializer for the former).
> 
> Sure, but that distinction is easy to fix for the consumer by looking at
> the array field in case array_at_struct_end_p returns true.  What I'm
> saying is that array_at_struct_end_p already performs analysis
> whether the array is the last field in the object and it already performs
> analysis of the object size - meaning it computes the possible extent
> of the array (and it matches that up with the array declaration).  It could
> simply communicate the length [bound] to the caller.
> 
> And if looking at an initializer is really necessary (IMHO that smells like
> the FE issue that fails to adjust DECL_SIZE of the object from it?) then
> even array_at_struct_end_p could be improved by looking at it by
> passing the (optional) initializer to the function.

The two functions serve different purposes but having them share
the same implementation seems like a fine idea as long as
component_ref_size still returns the same amount of detail about
the member.

I don't have much of an opinion on DECL_SIZE reflecting the size
of the whole object, including the flexible array member and its
initializer.  It seems to make sense to me, but it's been that
way for ages so I wouldn't expect changing it to be a slam dunk.

Martin

> 
> Richard.
> 
>> Martin
>>
>>>
>>> Richard.
>>>
>>>> gcc/ChangeLog:
>>>> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>>>>
>>>>           PR middle-end/100944
>>>>           * tree-dfa.c (get_ref_base_and_unit_offset): Add option to compute size
>>>>           of next field.
>>>>           * (get_ref_base_and_unit_offset_1): Same as above.
>>>>           * tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
>>>>           (get_ref_base_and_unit_offset_1): Same as above.
>>>>           * tree.c (least_common_record_1): New.
>>>>           (least_common_record): New.
>>>>           (component_ref_size): Improve array size calculation.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>> 2021-13-08  Giuliano Belinassi  <giuliano.belinassi@usp.br>
>>>>
>>>>           PR middle-end/100944
>>>>           * gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
>>>>           * gcc.dg/Warray-bounds-71.c: New test.
>>>> ---
>>>>    gcc/testsuite/gcc.dg/Warray-bounds-71.c       |  42 +++++++
>>>>    .../gcc.dg/Wzero-length-array-bounds.c        |  18 +--
>>>>    gcc/tree-dfa.c                                |  31 ++++-
>>>>    gcc/tree-dfa.h                                |   6 +-
>>>>    gcc/tree.c                                    | 115 ++++++++++++++----
>>>>    5 files changed, 172 insertions(+), 40 deletions(-)
>>>>    create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>>>
>>>> diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>>> new file mode 100644
>>>> index 00000000000..cc5b083bc77
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
>>>> @@ -0,0 +1,42 @@
>>>> +/* PR middle-end/100944 - missing -Warray-bounds accessing a flexible array
>>>> +   member of a nested struct
>>>> +   { dg-do compile }
>>>> +   { dg-options "-O2 -Wall" } */
>>>> +
>>>> +struct A0
>>>> +{
>>>> +  int i, a[0];
>>>> +};
>>>> +
>>>> +struct B0
>>>> +{
>>>> +  struct A0 a;
>>>> +  long x;
>>>> +} b0;
>>>> +
>>>> +void f0 (int i)
>>>> +{
>>>> +  long t = b0.x;
>>>> +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
>>>> +  if (t != b0.x)    // folded to false
>>>> +    __builtin_abort ();
>>>> +}
>>>> +
>>>> +struct Ax
>>>> +{
>>>> +  int i, a[];
>>>> +};
>>>> +
>>>> +struct Bx
>>>> +{
>>>> +  struct Ax a;
>>>> +  long x;
>>>> +} bx;
>>>> +
>>>> +void fx (int i)
>>>> +{
>>>> +  long t = bx.x;
>>>> +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
>>>> +  if (t != bx.x)    // folded to false
>>>> +    __builtin_abort ();
>>>> +}
>>>> diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>>>> index 8e880d92dea..117b30ff294 100644
>>>> --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>>>> +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
>>>> @@ -68,21 +68,21 @@ extern struct Y y;
>>>>
>>>>    void access_to_member (int i)
>>>>    {
>>>> -  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> -  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> -  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> +  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>> +  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>> +  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>>      sink (a);
>>>>
>>>> -  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> -  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> +  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>> +  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>>      /* Similar to the array case above, accesses to a subsequent member
>>>>         of the "parent" struct seem like a more severe problem than those
>>>>         to the next member of the same struct.  */
>>>> -  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> +  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
>>>>      sink (a);
>>>>
>>>> -  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> -  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> -  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
>>>> +  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>>> +  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>>> +  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>>>      y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
>>>>    }
>>>> diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
>>>> index 1d20de0c400..dc3c15f11d5 100644
>>>> --- a/gcc/tree-dfa.c
>>>> +++ b/gcc/tree-dfa.c
>>>> @@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT *poffset,
>>>>
>>>>    tree
>>>>    get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>>>> -                                tree (*valueize) (tree))
>>>> +                                tree (*valueize) (tree),
>>>> +                                bool of_next_component /* = false.  */)
>>>>    {
>>>>      poly_int64 byte_offset = 0;
>>>> +  tree next_field = NULL_TREE;
>>>>
>>>>      /* Compute cumulative byte-offset for nested component-refs and array-refs,
>>>>         and find the ultimate containing object.  */
>>>> @@ -797,11 +799,27 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>>>>           case COMPONENT_REF:
>>>>             {
>>>>               tree field = TREE_OPERAND (exp, 1);
>>>> -           tree this_offset = component_ref_field_offset (exp);
>>>> +           tree this_offset;
>>>> +
>>>>               poly_int64 hthis_offset;
>>>>
>>>> +           if (of_next_component && !next_field)
>>>> +             {
>>>> +               /* We are looking for the next component of the record.  */
>>>> +               next_field = TREE_CHAIN (field);
>>>> +               if (!next_field)
>>>> +                 break;
>>>> +
>>>> +               /* We found a next component.  Flag that we found it and
>>>> +                  update the target field.  */
>>>> +               field = next_field;
>>>> +             }
>>>> +
>>>> +           this_offset = component_ref_field_offset (exp);
>>>> +
>>>>               if (!this_offset
>>>>                   || !poly_int_tree_p (this_offset, &hthis_offset)
>>>> +               || TREE_CODE (field) != FIELD_DECL
>>>>                   || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
>>>>                       % BITS_PER_UNIT))
>>>>                 return NULL_TREE;
>>>> @@ -904,6 +922,9 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
>>>>    done:
>>>>
>>>>      *poffset = byte_offset;
>>>> +
>>>> +  if (of_next_component)
>>>> +    return next_field;
>>>>      return exp;
>>>>    }
>>>>
>>>> @@ -913,9 +934,11 @@ done:
>>>>       is not BITS_PER_UNIT-aligned.  */
>>>>
>>>>    tree
>>>> -get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
>>>> +get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset, bool
>>>> +                              of_next_component /* = false.  */)
>>>>    {
>>>> -  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
>>>> +  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
>>>> +                                         of_next_component);
>>>>    }
>>>>
>>>>    /* Returns true if STMT references an SSA_NAME that has
>>>> diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
>>>> index b1457ab065c..94e44d9c3f6 100644
>>>> --- a/gcc/tree-dfa.h
>>>> +++ b/gcc/tree-dfa.h
>>>> @@ -34,8 +34,10 @@ extern tree get_ref_base_and_extent (tree, poly_int64_pod *, poly_int64_pod *,
>>>>    extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
>>>>                                            HOST_WIDE_INT *, bool *);
>>>>    extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod *,
>>>> -                                            tree (*) (tree));
>>>> -extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *);
>>>> +                                            tree (*) (tree),
>>>> +                                            bool of_next_component = false);
>>>> +extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
>>>> +                                          bool = false);
>>>>    extern bool stmt_references_abnormal_ssa_name (gimple *);
>>>>    extern void replace_abnormal_ssa_names (gimple *);
>>>>    extern void dump_enumerated_decls (FILE *, dump_flags_t);
>>>> diff --git a/gcc/tree.c b/gcc/tree.c
>>>> index 1aa6e557a04..45d7fa2ae92 100644
>>>> --- a/gcc/tree.c
>>>> +++ b/gcc/tree.c
>>>> @@ -12649,6 +12649,47 @@ get_initializer_for (tree init, tree decl)
>>>>      return NULL_TREE;
>>>>    }
>>>>
>>>> +static int
>>>> +least_common_record_1 (tree basetype, tree field1, tree field2,
>>>> +                      tree *least_basetype)
>>>> +{
>>>> +  int ret = 0;
>>>> +
>>>> +  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN (fld))
>>>> +    {
>>>> +      if (fld == field1 || fld == field2)
>>>> +       ret++;
>>>> +
>>>> +      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
>>>> +         || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
>>>> +       ret += least_common_record_1 (TREE_TYPE (fld), field1, field2,
>>>> +                                     least_basetype);
>>>> +    }
>>>> +
>>>> +  if (ret == 2)
>>>> +    {
>>>> +      *least_basetype = basetype;
>>>> +      ret++; /* Avoid getting in this block again if a common basetype were
>>>> +               found.  */
>>>> +    }
>>>> +
>>>> +  return ret;
>>>> +}
>>>> +
>>>> +/* Find the least common RECORD type common to two FIELDS from base.  */
>>>> +static tree
>>>> +least_common_record (tree basetype, tree field1, tree field2)
>>>> +{
>>>> +  if (!(TREE_CODE (basetype) == RECORD_TYPE
>>>> +       || TREE_CODE (basetype) == UNION_TYPE))
>>>> +    return NULL_TREE;
>>>> +
>>>> +  tree least_basetype = NULL_TREE;
>>>> +  least_common_record_1 (basetype, field1, field2, &least_basetype);
>>>> +
>>>> +  return least_basetype;
>>>> +}
>>>> +
>>>>    /* Determines the size of the member referenced by the COMPONENT_REF
>>>>       REF, using its initializer expression if necessary in order to
>>>>       determine the size of an initialized flexible array member.
>>>> @@ -12768,28 +12809,30 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>>>>      memsize = NULL_TREE;
>>>>
>>>>      if (typematch)
>>>> -    /* MEMBER is a true flexible array member.  Compute its size from
>>>> -       the initializer of the BASE object if it has one.  */
>>>> -    if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
>>>> -      if (init != error_mark_node)
>>>> -       {
>>>> -         init = get_initializer_for (init, member);
>>>> -         if (init)
>>>> -           {
>>>> -             memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
>>>> -             if (tree refsize = TYPE_SIZE_UNIT (argtype))
>>>> -               {
>>>> -                 /* Use the larger of the initializer size and the tail
>>>> -                    padding in the enclosing struct.  */
>>>> -                 poly_int64 rsz = tree_to_poly_int64 (refsize);
>>>> -                 rsz -= baseoff;
>>>> -                 if (known_lt (tree_to_poly_int64 (memsize), rsz))
>>>> -                   memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
>>>> -               }
>>>> -
>>>> -             baseoff = 0;
>>>> -           }
>>>> -       }
>>>> +    {
>>>> +      /* MEMBER is a true flexible array member.  Compute its size from
>>>> +        the initializer of the BASE object if it has one.  */
>>>> +      if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
>>>> +       if (init != error_mark_node)
>>>> +         {
>>>> +           init = get_initializer_for (init, member);
>>>> +           if (init)
>>>> +             {
>>>> +               memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
>>>> +               if (tree refsize = TYPE_SIZE_UNIT (argtype))
>>>> +                 {
>>>> +                   /* Use the larger of the initializer size and the tail
>>>> +                      padding in the enclosing struct.  */
>>>> +                   poly_int64 rsz = tree_to_poly_int64 (refsize);
>>>> +                   rsz -= baseoff;
>>>> +                   if (known_lt (tree_to_poly_int64 (memsize), rsz))
>>>> +                     memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
>>>> +                 }
>>>> +
>>>> +               baseoff = 0;
>>>> +             }
>>>> +         }
>>>> +    }
>>>>
>>>>      if (!memsize)
>>>>        {
>>>> @@ -12810,9 +12853,31 @@ component_ref_size (tree ref, special_array_member *sam /* = NULL */)
>>>>             memsize = TYPE_SIZE_UNIT (bt);
>>>>           }
>>>>          else if (DECL_P (base))
>>>> -       /* Use the size of the BASE object (possibly an array of some
>>>> -          other type such as char used to store the struct).  */
>>>> -       memsize = DECL_SIZE_UNIT (base);
>>>> +       {
>>>> +         /* It is possible that the flexible array has no constructor at all.
>>>> +            In such cases, GCC will allocate the array in some weird way.
>>>> +            Assume that the array size is the difference between the start
>>>> +            address of the array and the next component, if it exists.  */
>>>> +
>>>> +         tree lcr;
>>>> +
>>>> +         poly_int64 baseoff2 = 0;
>>>> +         tree next_field = get_addr_base_and_unit_offset (ref, &baseoff2,
>>>> +                                                     /* next_elmnt =  */ true);
>>>> +         if (next_field
>>>> +             && (lcr = least_common_record (basetype, member, next_field))
>>>> +             && TREE_CODE (lcr) == RECORD_TYPE
>>>> +             && known_ge (baseoff2, baseoff))
>>>> +           {
>>>> +               poly_int64 size =  baseoff2 - baseoff;
>>>> +               memsize = wide_int_to_tree (TREE_TYPE (DECL_SIZE_UNIT (base)),
>>>> +                                           size);
>>>> +           }
>>>> +         else
>>>> +           /* Use the size of the BASE object (possibly an array of some
>>>> +              other type such as char used to store the struct).  */
>>>> +           memsize = DECL_SIZE_UNIT (base);
>>>> +       }
>>>>          else
>>>>           return NULL_TREE;
>>>>        }
>>>> --
>>>> 2.32.0
>>>>
>>
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
new file mode 100644
index 00000000000..cc5b083bc77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
@@ -0,0 +1,42 @@ 
+/* PR middle-end/100944 - missing -Warray-bounds accessing a flexible array
+   member of a nested struct
+   { dg-do compile }
+   { dg-options "-O2 -Wall" } */
+
+struct A0
+{
+  int i, a[0];
+};
+
+struct B0
+{
+  struct A0 a;
+  long x;
+} b0;
+
+void f0 (int i)
+{
+  long t = b0.x;
+  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
+  if (t != b0.x)    // folded to false
+    __builtin_abort ();
+}
+
+struct Ax
+{
+  int i, a[];
+};
+
+struct Bx
+{
+  struct Ax a;
+  long x;
+} bx;
+
+void fx (int i)
+{
+  long t = bx.x;
+  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
+  if (t != bx.x)    // folded to false
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
index 8e880d92dea..117b30ff294 100644
--- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
+++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
@@ -68,21 +68,21 @@  extern struct Y y;
 
 void access_to_member (int i)
 {
-  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.a[0].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
+  y.a[0].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
+  y.a[0].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
   sink (a);
 
-  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.a[1].a[0] = 0;      // { dg-warning "\\\[-Warray-bounds" }
+  y.a[1].a[1] = 0;      // { dg-warning "\\\[-Warray-bounds" }
   /* Similar to the array case above, accesses to a subsequent member
      of the "parent" struct seem like a more severe problem than those
      to the next member of the same struct.  */
-  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.a[1].a[2] = 0;      // { dg-warning "\\\[-Warray-bounds" }
   sink (a);
 
-  y.b.a[0] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.b.a[1] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.b.a[2] = 0;         // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.b.a[0] = 0;         // { dg-warning "\\\[-Warray-bounds" }
+  y.b.a[1] = 0;         // { dg-warning "\\\[-Warray-bounds" }
+  y.b.a[2] = 0;         // { dg-warning "\\\[-Warray-bounds" }
   y.b.a[3] = 0;         // { dg-warning "\\\[-Warray-bounds" }
 }
diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
index 1d20de0c400..dc3c15f11d5 100644
--- a/gcc/tree-dfa.c
+++ b/gcc/tree-dfa.c
@@ -772,9 +772,11 @@  get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT *poffset,
 
 tree
 get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
-				 tree (*valueize) (tree))
+				 tree (*valueize) (tree),
+				 bool of_next_component /* = false.  */)
 {
   poly_int64 byte_offset = 0;
+  tree next_field = NULL_TREE;
 
   /* Compute cumulative byte-offset for nested component-refs and array-refs,
      and find the ultimate containing object.  */
@@ -797,11 +799,27 @@  get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
 	case COMPONENT_REF:
 	  {
 	    tree field = TREE_OPERAND (exp, 1);
-	    tree this_offset = component_ref_field_offset (exp);
+	    tree this_offset;
+
 	    poly_int64 hthis_offset;
 
+	    if (of_next_component && !next_field)
+	      {
+		/* We are looking for the next component of the record.  */
+		next_field = TREE_CHAIN (field);
+		if (!next_field)
+		  break;
+
+		/* We found a next component.  Flag that we found it and
+		   update the target field.  */
+		field = next_field;
+	      }
+
+	    this_offset = component_ref_field_offset (exp);
+
 	    if (!this_offset
 		|| !poly_int_tree_p (this_offset, &hthis_offset)
+		|| TREE_CODE (field) != FIELD_DECL
 		|| (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
 		    % BITS_PER_UNIT))
 	      return NULL_TREE;
@@ -904,6 +922,9 @@  get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
 done:
 
   *poffset = byte_offset;
+
+  if (of_next_component)
+    return next_field;
   return exp;
 }
 
@@ -913,9 +934,11 @@  done:
    is not BITS_PER_UNIT-aligned.  */
 
 tree
-get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
+get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset, bool
+			       of_next_component /* = false.  */)
 {
-  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
+  return get_addr_base_and_unit_offset_1 (exp, poffset, NULL,
+					  of_next_component);
 }
 
 /* Returns true if STMT references an SSA_NAME that has
diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
index b1457ab065c..94e44d9c3f6 100644
--- a/gcc/tree-dfa.h
+++ b/gcc/tree-dfa.h
@@ -34,8 +34,10 @@  extern tree get_ref_base_and_extent (tree, poly_int64_pod *, poly_int64_pod *,
 extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
 					 HOST_WIDE_INT *, bool *);
 extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod *,
-					     tree (*) (tree));
-extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *);
+					     tree (*) (tree),
+					     bool of_next_component = false);
+extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *,
+					   bool = false);
 extern bool stmt_references_abnormal_ssa_name (gimple *);
 extern void replace_abnormal_ssa_names (gimple *);
 extern void dump_enumerated_decls (FILE *, dump_flags_t);
diff --git a/gcc/tree.c b/gcc/tree.c
index 1aa6e557a04..45d7fa2ae92 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -12649,6 +12649,47 @@  get_initializer_for (tree init, tree decl)
   return NULL_TREE;
 }
 
+static int
+least_common_record_1 (tree basetype, tree field1, tree field2,
+		       tree *least_basetype)
+{
+  int ret = 0;
+
+  for (tree fld = TYPE_FIELDS (basetype); fld; fld = TREE_CHAIN (fld))
+    {
+      if (fld == field1 || fld == field2)
+	ret++;
+
+      if (TREE_CODE (TREE_TYPE (fld)) == UNION_TYPE
+	  || TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE)
+	ret += least_common_record_1 (TREE_TYPE (fld), field1, field2,
+				      least_basetype);
+    }
+
+  if (ret == 2)
+    {
+      *least_basetype = basetype;
+      ret++; /* Avoid getting in this block again if a common basetype were
+		found.  */
+    }
+
+  return ret;
+}
+
+/* Find the least common RECORD type common to two FIELDS from base.  */
+static tree
+least_common_record (tree basetype, tree field1, tree field2)
+{
+  if (!(TREE_CODE (basetype) == RECORD_TYPE
+	|| TREE_CODE (basetype) == UNION_TYPE))
+    return NULL_TREE;
+
+  tree least_basetype = NULL_TREE;
+  least_common_record_1 (basetype, field1, field2, &least_basetype);
+
+  return least_basetype;
+}
+
 /* Determines the size of the member referenced by the COMPONENT_REF
    REF, using its initializer expression if necessary in order to
    determine the size of an initialized flexible array member.
@@ -12768,28 +12809,30 @@  component_ref_size (tree ref, special_array_member *sam /* = NULL */)
   memsize = NULL_TREE;
 
   if (typematch)
-    /* MEMBER is a true flexible array member.  Compute its size from
-       the initializer of the BASE object if it has one.  */
-    if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
-      if (init != error_mark_node)
-	{
-	  init = get_initializer_for (init, member);
-	  if (init)
-	    {
-	      memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
-	      if (tree refsize = TYPE_SIZE_UNIT (argtype))
-		{
-		  /* Use the larger of the initializer size and the tail
-		     padding in the enclosing struct.  */
-		  poly_int64 rsz = tree_to_poly_int64 (refsize);
-		  rsz -= baseoff;
-		  if (known_lt (tree_to_poly_int64 (memsize), rsz))
-		    memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
-		}
-
-	      baseoff = 0;
-	    }
-	}
+    {
+      /* MEMBER is a true flexible array member.  Compute its size from
+	 the initializer of the BASE object if it has one.  */
+      if (tree init = DECL_P (base) ? DECL_INITIAL (base) : NULL_TREE)
+	if (init != error_mark_node)
+	  {
+	    init = get_initializer_for (init, member);
+	    if (init)
+	      {
+		memsize = TYPE_SIZE_UNIT (TREE_TYPE (init));
+		if (tree refsize = TYPE_SIZE_UNIT (argtype))
+		  {
+		    /* Use the larger of the initializer size and the tail
+		       padding in the enclosing struct.  */
+		    poly_int64 rsz = tree_to_poly_int64 (refsize);
+		    rsz -= baseoff;
+		    if (known_lt (tree_to_poly_int64 (memsize), rsz))
+		      memsize = wide_int_to_tree (TREE_TYPE (memsize), rsz);
+		  }
+
+		baseoff = 0;
+	      }
+	  }
+    }
 
   if (!memsize)
     {
@@ -12810,9 +12853,31 @@  component_ref_size (tree ref, special_array_member *sam /* = NULL */)
 	  memsize = TYPE_SIZE_UNIT (bt);
 	}
       else if (DECL_P (base))
-	/* Use the size of the BASE object (possibly an array of some
-	   other type such as char used to store the struct).  */
-	memsize = DECL_SIZE_UNIT (base);
+	{
+	  /* It is possible that the flexible array has no constructor at all.
+	     In such cases, GCC will allocate the array in some weird way.
+	     Assume that the array size is the difference between the start
+	     address of the array and the next component, if it exists.  */
+
+	  tree lcr;
+
+	  poly_int64 baseoff2 = 0;
+	  tree next_field = get_addr_base_and_unit_offset (ref, &baseoff2,
+						      /* next_elmnt =  */ true);
+	  if (next_field
+	      && (lcr = least_common_record (basetype, member, next_field))
+	      && TREE_CODE (lcr) == RECORD_TYPE
+	      && known_ge (baseoff2, baseoff))
+	    {
+		poly_int64 size =  baseoff2 - baseoff;
+		memsize = wide_int_to_tree (TREE_TYPE (DECL_SIZE_UNIT (base)),
+					    size);
+	    }
+	  else
+	    /* Use the size of the BASE object (possibly an array of some
+	       other type such as char used to store the struct).  */
+	    memsize = DECL_SIZE_UNIT (base);
+	}
       else
 	return NULL_TREE;
     }