diff mbox

Add a new type attribute always_alias (PR79671)

Message ID alpine.LSU.2.20.1704060925130.30715@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener April 6, 2017, 7:47 a.m. UTC
On Wed, 5 Apr 2017, Bernd Edlinger wrote:

> On 04/05/17 19:22, Bernd Edlinger wrote:
> > On 04/05/17 18:08, Jakub Jelinek wrote:
> >
> > Yes, exactly.  I really want to reach the deadline for gcc-7.
> > Fixing the name is certainly the most important first step,
> > and if everybody agrees on "typeless_storage", for the name
> > I can start with adjusting the name, and look into how
> > to use a spare type-flag that should be a mechanical change.
> >
> 
> Jakub, I just renamed the attribute and reworked the patch
> as you suggested, reg-testing is not yet completed, but
> it looks good so far.  I also added a few more tests.
> 
> I have changed the documentation as Richi suggested, but
> I am not too sure what to say here.

The alias.c changes are not sufficient.  I think what you want is
sth like


not needed (but also not sufficient - you need to handle arrays of
byte somewhere).

@@ -1491,6 +1491,7 @@ struct GTY(()) tree_type_common {
   unsigned needs_constructing_flag : 1;
   unsigned transparent_aggr_flag : 1;
   unsigned restrict_flag : 1;
+  unsigned typeless_storage_flag : 1;
   unsigned contains_placeholder_bits : 2;

   ENUM_BITFIELD(machine_mode) mode : 8;

bits are grouped in groups of 8 bits, this breaks it.

@@ -8041,7 +8041,8 @@ build_pointer_type_for_mode (tree to_type, machine

   /* If the pointed-to type has the may_alias attribute set, force
      a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
-  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
+  if (TYPE_TYPELESS_STORAGE (to_type)
+      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
     can_alias_all = true;

   /* In some cases, languages will have things that aren't a POINTER_TYPE
@@ -8110,7 +8111,8 @@ build_reference_type_for_mode (tree to_type, machi

   /* If the pointed-to type has the may_alias attribute set, force
      a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
-  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
+  if (TYPE_TYPELESS_STORAGE (to_type)
+      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
     can_alias_all = true;

   /* In some cases, languages will have things that aren't a

not needed.

+/* Nonzero if the type should behave like a character type
+   with respect to aliasing sementics.  */
+#define TYPE_TYPELESS_STORAGE(NODE) \
+  (TYPE_CHECK (NODE)->type_common.typeless_storage_flag)

ARRAY_TYPE_CHECK (NODE)->

Richard.

Comments

Jakub Jelinek April 6, 2017, 7:51 a.m. UTC | #1
On Thu, Apr 06, 2017 at 09:47:10AM +0200, Richard Biener wrote:
> @@ -955,6 +960,7 @@ get_alias_set (tree t)
>       Just be pragmatic here and make sure the array and its element
>       type get the same alias set assigned.  */
>    else if (TREE_CODE (t) == ARRAY_TYPE
> +	   && ! TYPE_TYPELESS_STORAGE (t)
>  	   && (!TYPE_NONALIASED_COMPONENT (t)
>  	       || TYPE_STRUCTURAL_EQUALITY_P (t)))
>      set = get_alias_set (TREE_TYPE (t));
> @@ -1094,6 +1100,15 @@ get_alias_set (tree t)
>  
>    TYPE_ALIAS_SET (t) = set;
>  
> +  if (TREE_CODE (t) == ARRAY_TYPE
> +      && TYPE_TYPELESS_STORAGE (t))

Shouldn't TYPE_TYPELESS_STORAGE apply even for non-array types?
If somebody chooses to store anything in long long
__attribute__((typeless_storage)), so be it.  Or what kind of complications
do you see for that?

	Jakub
Richard Biener April 6, 2017, 7:55 a.m. UTC | #2
On Thu, 6 Apr 2017, Jakub Jelinek wrote:

> On Thu, Apr 06, 2017 at 09:47:10AM +0200, Richard Biener wrote:
> > @@ -955,6 +960,7 @@ get_alias_set (tree t)
> >       Just be pragmatic here and make sure the array and its element
> >       type get the same alias set assigned.  */
> >    else if (TREE_CODE (t) == ARRAY_TYPE
> > +	   && ! TYPE_TYPELESS_STORAGE (t)
> >  	   && (!TYPE_NONALIASED_COMPONENT (t)
> >  	       || TYPE_STRUCTURAL_EQUALITY_P (t)))
> >      set = get_alias_set (TREE_TYPE (t));
> > @@ -1094,6 +1100,15 @@ get_alias_set (tree t)
> >  
> >    TYPE_ALIAS_SET (t) = set;
> >  
> > +  if (TREE_CODE (t) == ARRAY_TYPE
> > +      && TYPE_TYPELESS_STORAGE (t))
> 
> Shouldn't TYPE_TYPELESS_STORAGE apply even for non-array types?
> If somebody chooses to store anything in long long
> __attribute__((typeless_storage)), so be it.  Or what kind of complications
> do you see for that?

It's a new feature so I don't see why we should allow that.  Given that
people will have to do sth when the compiler doesn't support it the
only "reliable" way of using it is on an array of char anyway.

The complication starts when people use it on a type that currently
uses alias-set zero (because "zero" doesn't get an alias_set_entry).

Richard.
Bernd Edlinger April 6, 2017, 2:11 p.m. UTC | #3
On 04/06/17 09:55, Richard Biener wrote:
> On Thu, 6 Apr 2017, Jakub Jelinek wrote:
>
>> On Thu, Apr 06, 2017 at 09:47:10AM +0200, Richard Biener wrote:
>>> @@ -955,6 +960,7 @@ get_alias_set (tree t)
>>>       Just be pragmatic here and make sure the array and its element
>>>       type get the same alias set assigned.  */
>>>    else if (TREE_CODE (t) == ARRAY_TYPE
>>> +	   && ! TYPE_TYPELESS_STORAGE (t)
>>>  	   && (!TYPE_NONALIASED_COMPONENT (t)
>>>  	       || TYPE_STRUCTURAL_EQUALITY_P (t)))
>>>      set = get_alias_set (TREE_TYPE (t));
>>> @@ -1094,6 +1100,15 @@ get_alias_set (tree t)
>>>
>>>    TYPE_ALIAS_SET (t) = set;
>>>
>>> +  if (TREE_CODE (t) == ARRAY_TYPE
>>> +      && TYPE_TYPELESS_STORAGE (t))
>>
>> Shouldn't TYPE_TYPELESS_STORAGE apply even for non-array types?
>> If somebody chooses to store anything in long long
>> __attribute__((typeless_storage)), so be it.  Or what kind of complications
>> do you see for that?
>
> It's a new feature so I don't see why we should allow that.  Given that
> people will have to do sth when the compiler doesn't support it the
> only "reliable" way of using it is on an array of char anyway.
>
> The complication starts when people use it on a type that currently
> uses alias-set zero (because "zero" doesn't get an alias_set_entry).
>

The typeless_storage does not need to implement all the C++ semantic
by itself.  It would be possible, but then it is not as generic as
it could be.  What I'd really like to have is make an arbitrary
type behave as if it were a char with respect to aliasing.

In my mind, the typeless_storage attribute has a value of its own,
but it can be used to implement the C++17 semantic of std::byte [N].

So I would not want to completely change the way TBAA is working
today.  I believe it is doing a fairly good job.

The TBAA machinery, does for instance not need to propagate this
attribute from the member to the enclosing struct that is
also not done for a struct that contains a char.

I think it is not too complicated to done in the C++ FE.
The FE looks for array of std::byte and unsigned char,
and sets the attribute when the final type is constructed.

What I am trying to do is just extend the semantic of may_alias
a bit, and then have the C++ FE use it in the way it has to.

Here is what I want to write in the doc:

@item typeless_storage
@cindex @code{typeless_storage} type attribute
A type declared with this attribute behaves like a character type
with respect to aliasing semantics.
This is attribute is similar to the @code{may_alias} attribute,
except that it is not restricted to pointers.

Example of use:

@smallexample
typedef int __attribute__((__typeless_storage__)) int_a;

int
main (void)
@{
   int_a a = 0x12345678;
   short *b = (short *) &a;

   b[1] = 0;

   if (a == 0x12345678)
     abort();

   exit(0);
@}
@end smallexample


we should first agree on that.

Bernd.
Florian Weimer April 6, 2017, 2:17 p.m. UTC | #4
On 04/06/2017 04:11 PM, Bernd Edlinger wrote:

> I think it is not too complicated to done in the C++ FE.
> The FE looks for array of std::byte and unsigned char,
> and sets the attribute when the final type is constructed.
>
> What I am trying to do is just extend the semantic of may_alias
> a bit, and then have the C++ FE use it in the way it has to.

We also need this for some POSIX and Linux kernel interfaces.  A 
C++-only solution would not help with that.

> Here is what I want to write in the doc:
>
> @item typeless_storage
> @cindex @code{typeless_storage} type attribute
> A type declared with this attribute behaves like a character type
> with respect to aliasing semantics.
> This is attribute is similar to the @code{may_alias} attribute,
> except that it is not restricted to pointers.

As Jakub pointed out, this is not what we need here.  An object of type 
char does *not* have untyped storage.  Accessing it as a different type 
is still undefined.

The documentation says that the memory region is considered to by 
untyped, like a memory region returned by malloc (but obviously not with 
the implication that the memory region is separated from everything else).

Thanks,
Florian
Richard Biener April 6, 2017, 2:22 p.m. UTC | #5
On Thu, 6 Apr 2017, Bernd Edlinger wrote:

> On 04/06/17 09:55, Richard Biener wrote:
> > On Thu, 6 Apr 2017, Jakub Jelinek wrote:
> >
> >> On Thu, Apr 06, 2017 at 09:47:10AM +0200, Richard Biener wrote:
> >>> @@ -955,6 +960,7 @@ get_alias_set (tree t)
> >>>       Just be pragmatic here and make sure the array and its element
> >>>       type get the same alias set assigned.  */
> >>>    else if (TREE_CODE (t) == ARRAY_TYPE
> >>> +	   && ! TYPE_TYPELESS_STORAGE (t)
> >>>  	   && (!TYPE_NONALIASED_COMPONENT (t)
> >>>  	       || TYPE_STRUCTURAL_EQUALITY_P (t)))
> >>>      set = get_alias_set (TREE_TYPE (t));
> >>> @@ -1094,6 +1100,15 @@ get_alias_set (tree t)
> >>>
> >>>    TYPE_ALIAS_SET (t) = set;
> >>>
> >>> +  if (TREE_CODE (t) == ARRAY_TYPE
> >>> +      && TYPE_TYPELESS_STORAGE (t))
> >>
> >> Shouldn't TYPE_TYPELESS_STORAGE apply even for non-array types?
> >> If somebody chooses to store anything in long long
> >> __attribute__((typeless_storage)), so be it.  Or what kind of complications
> >> do you see for that?
> >
> > It's a new feature so I don't see why we should allow that.  Given that
> > people will have to do sth when the compiler doesn't support it the
> > only "reliable" way of using it is on an array of char anyway.
> >
> > The complication starts when people use it on a type that currently
> > uses alias-set zero (because "zero" doesn't get an alias_set_entry).
> >
> 
> The typeless_storage does not need to implement all the C++ semantic
> by itself.  It would be possible, but then it is not as generic as
> it could be.  What I'd really like to have is make an arbitrary
> type behave as if it were a char with respect to aliasing.
> 
> In my mind, the typeless_storage attribute has a value of its own,
> but it can be used to implement the C++17 semantic of std::byte [N].
> 
> So I would not want to completely change the way TBAA is working
> today.  I believe it is doing a fairly good job.
> 
> The TBAA machinery, does for instance not need to propagate this
> attribute from the member to the enclosing struct that is
> also not done for a struct that contains a char.
> 
> I think it is not too complicated to done in the C++ FE.
> The FE looks for array of std::byte and unsigned char,
> and sets the attribute when the final type is constructed.
> 
> What I am trying to do is just extend the semantic of may_alias
> a bit, and then have the C++ FE use it in the way it has to.
> 
> Here is what I want to write in the doc:
> 
> @item typeless_storage
> @cindex @code{typeless_storage} type attribute
> A type declared with this attribute behaves like a character type
> with respect to aliasing semantics.
> This is attribute is similar to the @code{may_alias} attribute,
> except that it is not restricted to pointers.
> 
> Example of use:
> 
> @smallexample
> typedef int __attribute__((__typeless_storage__)) int_a;
> 
> int
> main (void)
> @{
>    int_a a = 0x12345678;
>    short *b = (short *) &a;
> 
>    b[1] = 0;
> 
>    if (a == 0x12345678)
>      abort();
> 
>    exit(0);
> @}
> @end smallexample
> 
> 
> we should first agree on that.

I don't see anyone needing the above example, it's not going to be
portable in any way.

Please don't invent sth that invites users to write bad code.
I'd even restrict it to work on arrays of chars only!
(arrays of byte-size integer types)

Richard.
Richard Biener April 6, 2017, 2:23 p.m. UTC | #6
On Thu, 6 Apr 2017, Florian Weimer wrote:

> On 04/06/2017 04:11 PM, Bernd Edlinger wrote:
> 
> > I think it is not too complicated to done in the C++ FE.
> > The FE looks for array of std::byte and unsigned char,
> > and sets the attribute when the final type is constructed.
> > 
> > What I am trying to do is just extend the semantic of may_alias
> > a bit, and then have the C++ FE use it in the way it has to.
> 
> We also need this for some POSIX and Linux kernel interfaces.  A C++-only
> solution would not help with that.

Example(s)?

> > Here is what I want to write in the doc:
> > 
> > @item typeless_storage
> > @cindex @code{typeless_storage} type attribute
> > A type declared with this attribute behaves like a character type
> > with respect to aliasing semantics.
> > This is attribute is similar to the @code{may_alias} attribute,
> > except that it is not restricted to pointers.
> 
> As Jakub pointed out, this is not what we need here.  An object of type char
> does *not* have untyped storage.  Accessing it as a different type is still
> undefined.
> 
> The documentation says that the memory region is considered to by untyped,
> like a memory region returned by malloc (but obviously not with the
> implication that the memory region is separated from everything else).
> 
> Thanks,
> Florian
> 
>
Jonathan Wakely April 6, 2017, 2:43 p.m. UTC | #7
On 06/04/17 16:23 +0200, Richard Biener wrote:
>On Thu, 6 Apr 2017, Florian Weimer wrote:
>
>> On 04/06/2017 04:11 PM, Bernd Edlinger wrote:
>>
>> > I think it is not too complicated to done in the C++ FE.
>> > The FE looks for array of std::byte and unsigned char,
>> > and sets the attribute when the final type is constructed.
>> >
>> > What I am trying to do is just extend the semantic of may_alias
>> > a bit, and then have the C++ FE use it in the way it has to.
>>
>> We also need this for some POSIX and Linux kernel interfaces.  A C++-only
>> solution would not help with that.
>
>Example(s)?

sockaddr_storage comes to mind.
Florian Weimer April 6, 2017, 2:51 p.m. UTC | #8
On 04/06/2017 04:43 PM, Jonathan Wakely wrote:
> On 06/04/17 16:23 +0200, Richard Biener wrote:
>> On Thu, 6 Apr 2017, Florian Weimer wrote:
>>
>>> On 04/06/2017 04:11 PM, Bernd Edlinger wrote:
>>>
>>> > I think it is not too complicated to done in the C++ FE.
>>> > The FE looks for array of std::byte and unsigned char,
>>> > and sets the attribute when the final type is constructed.
>>> >
>>> > What I am trying to do is just extend the semantic of may_alias
>>> > a bit, and then have the C++ FE use it in the way it has to.
>>>
>>> We also need this for some POSIX and Linux kernel interfaces.  A
>>> C++-only
>>> solution would not help with that.
>>
>> Example(s)?
>
> sockaddr_storage comes to mind.

Right.  The kernel also has many APIs which return multiple 
variable-length data blocks, such as getdents64, and many more 
interfaces in combination with read/recv system calls.  Variable length 
means that you cannot declare the appropriate type after the first data 
item, so you technically have to use malloc.

POSIX interfaces which exhibit a similar pattern are getpwnam_r and 
friends, but for them, you can probably use malloc without ill effect 
(although there are still performance concerns).

Thanks,
Florian
Jakub Jelinek April 6, 2017, 3:05 p.m. UTC | #9
On Thu, Apr 06, 2017 at 04:51:01PM +0200, Florian Weimer wrote:
> On 04/06/2017 04:43 PM, Jonathan Wakely wrote:
> > On 06/04/17 16:23 +0200, Richard Biener wrote:
> > > On Thu, 6 Apr 2017, Florian Weimer wrote:
> > > 
> > > > On 04/06/2017 04:11 PM, Bernd Edlinger wrote:
> > > > 
> > > > > I think it is not too complicated to done in the C++ FE.
> > > > > The FE looks for array of std::byte and unsigned char,
> > > > > and sets the attribute when the final type is constructed.
> > > > >
> > > > > What I am trying to do is just extend the semantic of may_alias
> > > > > a bit, and then have the C++ FE use it in the way it has to.
> > > > 
> > > > We also need this for some POSIX and Linux kernel interfaces.  A
> > > > C++-only
> > > > solution would not help with that.
> > > 
> > > Example(s)?
> > 
> > sockaddr_storage comes to mind.
> 
> Right.  The kernel also has many APIs which return multiple variable-length
> data blocks, such as getdents64, and many more interfaces in combination

The kernel uses -fno-strict-aliasing I think, so it doesn't care.

	Jakub
Florian Weimer April 6, 2017, 3:10 p.m. UTC | #10
On 04/06/2017 05:05 PM, Jakub Jelinek wrote:
> On Thu, Apr 06, 2017 at 04:51:01PM +0200, Florian Weimer wrote:
>> On 04/06/2017 04:43 PM, Jonathan Wakely wrote:
>>> On 06/04/17 16:23 +0200, Richard Biener wrote:
>>>> On Thu, 6 Apr 2017, Florian Weimer wrote:
>>>>
>>>>> On 04/06/2017 04:11 PM, Bernd Edlinger wrote:
>>>>>
>>>>>> I think it is not too complicated to done in the C++ FE.
>>>>>> The FE looks for array of std::byte and unsigned char,
>>>>>> and sets the attribute when the final type is constructed.
>>>>>>
>>>>>> What I am trying to do is just extend the semantic of may_alias
>>>>>> a bit, and then have the C++ FE use it in the way it has to.
>>>>>
>>>>> We also need this for some POSIX and Linux kernel interfaces.  A
>>>>> C++-only
>>>>> solution would not help with that.
>>>>
>>>> Example(s)?
>>>
>>> sockaddr_storage comes to mind.
>>
>> Right.  The kernel also has many APIs which return multiple variable-length
>> data blocks, such as getdents64, and many more interfaces in combination
>
> The kernel uses -fno-strict-aliasing I think, so it doesn't care.

These APIs (getdents64, inotify, lots of netlink stuff, probably more) 
extend to user space.

Thanks,
Florian
Bernd Edlinger April 6, 2017, 5:39 p.m. UTC | #11
On 04/06/17 16:17, Florian Weimer wrote:
>> Here is what I want to write in the doc:
>>
>> @item typeless_storage
>> @cindex @code{typeless_storage} type attribute
>> A type declared with this attribute behaves like a character type
>> with respect to aliasing semantics.
>> This is attribute is similar to the @code{may_alias} attribute,
>> except that it is not restricted to pointers.
>
> As Jakub pointed out, this is not what we need here.  An object of type
> char does *not* have untyped storage.  Accessing it as a different type
> is still undefined.
>

but, do you agree that this is valid in C11?

typedef char char_a[4];

int
main (void)
{
   char_a a = {1,2,3,4};
   short *b = (short *) &a;

   b[1] = 0;

   if (a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4)
     abort();

   exit(0);
}


all I want to do is replace "char" with a different type.

Bernd.

> The documentation says that the memory region is considered to by
> untyped, like a memory region returned by malloc (but obviously not with
> the implication that the memory region is separated from everything else).
>
> Thanks,
> Florian
Florian Weimer April 6, 2017, 5:47 p.m. UTC | #12
On 04/06/2017 07:39 PM, Bernd Edlinger wrote:
> On 04/06/17 16:17, Florian Weimer wrote:
>>> Here is what I want to write in the doc:
>>>
>>> @item typeless_storage
>>> @cindex @code{typeless_storage} type attribute
>>> A type declared with this attribute behaves like a character type
>>> with respect to aliasing semantics.
>>> This is attribute is similar to the @code{may_alias} attribute,
>>> except that it is not restricted to pointers.
>>
>> As Jakub pointed out, this is not what we need here.  An object of type
>> char does *not* have untyped storage.  Accessing it as a different type
>> is still undefined.
>>
>
> but, do you agree that this is valid in C11?
>
> typedef char char_a[4];
>
> int
> main (void)
> {
>    char_a a = {1,2,3,4};
>    short *b = (short *) &a;
>
>    b[1] = 0;
>
>    if (a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4)
>      abort();
>
>    exit(0);
> }
>
>
> all I want to do is replace "char" with a different type.

Thanks a lot for posting a concrete example.

The effective type of a[2] and [3] is char.  The character type wildcard 
in 6.5(7) only applies to the type of the lvalue expression ysed for the 
access, not the effective type of the object being accessed.  The type 
of the LHS of the assignment expression is short.  So the access is 
undefined.

Florian
Bernd Edlinger April 6, 2017, 6:12 p.m. UTC | #13
On 04/06/17 19:47, Florian Weimer wrote:
> On 04/06/2017 07:39 PM, Bernd Edlinger wrote:
>> On 04/06/17 16:17, Florian Weimer wrote:
>>>> Here is what I want to write in the doc:
>>>>
>>>> @item typeless_storage
>>>> @cindex @code{typeless_storage} type attribute
>>>> A type declared with this attribute behaves like a character type
>>>> with respect to aliasing semantics.
>>>> This is attribute is similar to the @code{may_alias} attribute,
>>>> except that it is not restricted to pointers.
>>>
>>> As Jakub pointed out, this is not what we need here.  An object of type
>>> char does *not* have untyped storage.  Accessing it as a different type
>>> is still undefined.
>>>
>>
>> but, do you agree that this is valid in C11?
>>
>> typedef char char_a[4];
>>
>> int
>> main (void)
>> {
>>    char_a a = {1,2,3,4};
>>    short *b = (short *) &a;
>>
>>    b[1] = 0;
>>
>>    if (a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4)
>>      abort();
>>
>>    exit(0);
>> }
>>
>>
>> all I want to do is replace "char" with a different type.
>
> Thanks a lot for posting a concrete example.
>
> The effective type of a[2] and [3] is char.  The character type wildcard
> in 6.5(7) only applies to the type of the lvalue expression ysed for the
> access, not the effective type of the object being accessed.  The type
> of the LHS of the assignment expression is short.  So the access is
> undefined.
>

exactly *that* is what I want to make valid with that attribute, which
would be also useful in C and kernel code, IMHO.

But isn't the effective type changed by the assignment b[1] = 0;
as described in 6.5(6):
"If a value is stored into an object having no declared type through an
lvalue having a type that is not a character type, then the type of the
lvalue becomes the effective type of the object for that access and for
subsequent accesses that do not modify the stored value."



Bernd.
Florian Weimer April 6, 2017, 6:19 p.m. UTC | #14
On 04/06/2017 08:12 PM, Bernd Edlinger wrote:
> On 04/06/17 19:47, Florian Weimer wrote:
>> On 04/06/2017 07:39 PM, Bernd Edlinger wrote:
>>> On 04/06/17 16:17, Florian Weimer wrote:
>>>>> Here is what I want to write in the doc:
>>>>>
>>>>> @item typeless_storage
>>>>> @cindex @code{typeless_storage} type attribute
>>>>> A type declared with this attribute behaves like a character type
>>>>> with respect to aliasing semantics.
>>>>> This is attribute is similar to the @code{may_alias} attribute,
>>>>> except that it is not restricted to pointers.
>>>>
>>>> As Jakub pointed out, this is not what we need here.  An object of type
>>>> char does *not* have untyped storage.  Accessing it as a different type
>>>> is still undefined.
>>>>
>>>
>>> but, do you agree that this is valid in C11?
>>>
>>> typedef char char_a[4];
>>>
>>> int
>>> main (void)
>>> {
>>>    char_a a = {1,2,3,4};
>>>    short *b = (short *) &a;
>>>
>>>    b[1] = 0;
>>>
>>>    if (a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4)
>>>      abort();
>>>
>>>    exit(0);
>>> }
>>>
>>>
>>> all I want to do is replace "char" with a different type.
>>
>> Thanks a lot for posting a concrete example.
>>
>> The effective type of a[2] and [3] is char.  The character type wildcard
>> in 6.5(7) only applies to the type of the lvalue expression ysed for the
>> access, not the effective type of the object being accessed.  The type
>> of the LHS of the assignment expression is short.  So the access is
>> undefined.
>>
>
> exactly *that* is what I want to make valid with that attribute, which
> would be also useful in C and kernel code, IMHO.

And I think we all agree that this is a laudable goal.

> But isn't the effective type changed by the assignment b[1] = 0;
> as described in 6.5(6):
> "If a value is stored into an object having no declared type through an
> lvalue having a type that is not a character type, then the type of the
> lvalue becomes the effective type of the object for that access and for
> subsequent accesses that do not modify the stored value."

I don't know what your patch does, but your proposed documentation does 
not make this valid because “declared as char” is still not “having no 
declared type”.  Or put differently, “behaves like a character type” is 
not what we actually want here.

Let me repeat that I don't know if this is merely a documentation issue.

Thanks,
Florian
Bernd Edlinger April 6, 2017, 6:49 p.m. UTC | #15
On 04/06/17 20:19, Florian Weimer wrote:
>
> I don't know what your patch does, but your proposed documentation does
> not make this valid because “declared as char” is still not “having no
> declared type”.  Or put differently, “behaves like a character type” is
> not what we actually want here.
>

What the patch does is just so simple but it is hard for me to find the
right words so that really everybody understands:

Technically, we already have the may_alias attribute, that forces all
access through pointers to have "alias set 0" that in turn makes all
other objects volatile, unless the compiler can prove that the address
is in fact different.  But it has no impact on DECLs, so if you
use may_alias on a type, and you declare an object with that type,
then directly accessing that object by name does NOT have "alias set 0".

When I noticed that in the context of PR79671 I initially thought that
was by accident, but Richi pointed out that this is a useful feature for
vector types, that are always declared as may_alias, and moreover
the may_alias is / has been always documented to have only meaning
on pointers, all that changed is that the TBAA aliasing oracle has
improved recently to follow the specified behavior more closely.

My patch simply duplicates the semantic of may_alias and adds
"alias set 0" for accesses through DECLs of that type.


However, I must confess I find it difficult to understand the
language in which the ISO standard is written.

For instance how do you "declare an object without a declared type"?


> Let me repeat that I don't know if this is merely a documentation issue.
>
> Thanks,
> Florian
Florian Weimer April 6, 2017, 7:05 p.m. UTC | #16
On 04/06/2017 08:49 PM, Bernd Edlinger wrote:

> For instance how do you "declare an object without a declared type"?

malloc and other allocation functions return pointers to objects without 
a declared type.

Thanks,
Florian
Richard Biener April 6, 2017, 7:12 p.m. UTC | #17
On April 6, 2017 4:51:01 PM GMT+02:00, Florian Weimer <fweimer@redhat.com> wrote:
>On 04/06/2017 04:43 PM, Jonathan Wakely wrote:
>> On 06/04/17 16:23 +0200, Richard Biener wrote:
>>> On Thu, 6 Apr 2017, Florian Weimer wrote:
>>>
>>>> On 04/06/2017 04:11 PM, Bernd Edlinger wrote:
>>>>
>>>> > I think it is not too complicated to done in the C++ FE.
>>>> > The FE looks for array of std::byte and unsigned char,
>>>> > and sets the attribute when the final type is constructed.
>>>> >
>>>> > What I am trying to do is just extend the semantic of may_alias
>>>> > a bit, and then have the C++ FE use it in the way it has to.
>>>>
>>>> We also need this for some POSIX and Linux kernel interfaces.  A
>>>> C++-only
>>>> solution would not help with that.
>>>
>>> Example(s)?
>>
>> sockaddr_storage comes to mind.
>
>Right.  The kernel also has many APIs which return multiple 
>variable-length data blocks, such as getdents64, and many more 
>interfaces in combination with read/recv system calls.  Variable length
>
>means that you cannot declare the appropriate type after the first data
>
>item, so you technically have to use malloc.
>
>POSIX interfaces which exhibit a similar pattern are getpwnam_r and 
>friends, but for them, you can probably use malloc without ill effect 
>(although there are still performance concerns).

Can you give a concrete example which shows the issue and how typeless_storage helps?

Thanks,
Richard.

>Thanks,
>Florian
Richard Biener April 6, 2017, 7:14 p.m. UTC | #18
On April 6, 2017 7:39:14 PM GMT+02:00, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
>On 04/06/17 16:17, Florian Weimer wrote:
>>> Here is what I want to write in the doc:
>>>
>>> @item typeless_storage
>>> @cindex @code{typeless_storage} type attribute
>>> A type declared with this attribute behaves like a character type
>>> with respect to aliasing semantics.
>>> This is attribute is similar to the @code{may_alias} attribute,
>>> except that it is not restricted to pointers.
>>
>> As Jakub pointed out, this is not what we need here.  An object of
>type
>> char does *not* have untyped storage.  Accessing it as a different
>type
>> is still undefined.
>>
>
>but, do you agree that this is valid in C11?
>
>typedef char char_a[4];
>
>int
>main (void)
>{
>   char_a a = {1,2,3,4};
>   short *b = (short *) &a;
>
>   b[1] = 0;
>
>   if (a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4)
>     abort();
>
>   exit(0);
>}
>
>
>all I want to do is replace "char" with a different type.

Why?

Richard.

>Bernd.
>
>> The documentation says that the memory region is considered to by
>> untyped, like a memory region returned by malloc (but obviously not
>with
>> the implication that the memory region is separated from everything
>else).
>>
>> Thanks,
>> Florian
Richard Biener April 6, 2017, 7:16 p.m. UTC | #19
On April 6, 2017 8:12:29 PM GMT+02:00, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
>On 04/06/17 19:47, Florian Weimer wrote:
>> On 04/06/2017 07:39 PM, Bernd Edlinger wrote:
>>> On 04/06/17 16:17, Florian Weimer wrote:
>>>>> Here is what I want to write in the doc:
>>>>>
>>>>> @item typeless_storage
>>>>> @cindex @code{typeless_storage} type attribute
>>>>> A type declared with this attribute behaves like a character type
>>>>> with respect to aliasing semantics.
>>>>> This is attribute is similar to the @code{may_alias} attribute,
>>>>> except that it is not restricted to pointers.
>>>>
>>>> As Jakub pointed out, this is not what we need here.  An object of
>type
>>>> char does *not* have untyped storage.  Accessing it as a different
>type
>>>> is still undefined.
>>>>
>>>
>>> but, do you agree that this is valid in C11?
>>>
>>> typedef char char_a[4];
>>>
>>> int
>>> main (void)
>>> {
>>>    char_a a = {1,2,3,4};
>>>    short *b = (short *) &a;
>>>
>>>    b[1] = 0;
>>>
>>>    if (a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4)
>>>      abort();
>>>
>>>    exit(0);
>>> }
>>>
>>>
>>> all I want to do is replace "char" with a different type.
>>
>> Thanks a lot for posting a concrete example.
>>
>> The effective type of a[2] and [3] is char.  The character type
>wildcard
>> in 6.5(7) only applies to the type of the lvalue expression ysed for
>the
>> access, not the effective type of the object being accessed.  The
>type
>> of the LHS of the assignment expression is short.  So the access is
>> undefined.
>>
>
>exactly *that* is what I want to make valid with that attribute, which
>would be also useful in C and kernel code, IMHO.
>
>But isn't the effective type changed by the assignment b[1] = 0;
>as described in 6.5(6):
>"If a value is stored into an object having no declared type through an
>lvalue having a type that is not a character type, then the type of the
>lvalue becomes the effective type of the object for that access and for
>subsequent accesses that do not modify the stored value."

Yes.  I think the example is valid.  At least GCCs memory model makes it so.

Richard.

>
>
>Bernd.
Bernd Edlinger April 6, 2017, 7:20 p.m. UTC | #20
On 04/06/17 21:05, Florian Weimer wrote:
> On 04/06/2017 08:49 PM, Bernd Edlinger wrote:
>
>> For instance how do you "declare an object without a declared type"?
>
> malloc and other allocation functions return pointers to objects without
> a declared type.
>

Thanks Florian,

this discussion is very helpful.

How about this for the documentation:

@item typeless_storage
@cindex @code{typeless_storage} type attribute
In the context of section 6.5 paragraph 6 of the C11 standard,
an object of this type behaves as if it has no declared type.
In the context of section 6.5 paragraph 7 of the C11 standard,
an object or a pointer if this type behaves as if it were a
character type.
This is attribute is similar to the @code{may_alias} attribute,
except that it is not restricted to pointers.

Example of use:

@smallexample
typedef int __attribute__((__typeless_storage__)) int_a;

int
main (void)
@{
   int_a a = 0x12345678;
   short *b = (short *) &a;

   b[1] = 0;

   if (a == 0x12345678)
     abort();

   exit(0);
@}
@end smallexample


Bernd.
Bernd Edlinger April 6, 2017, 7:51 p.m. UTC | #21
On 04/06/17 21:14, Richard Biener wrote:
> On April 6, 2017 7:39:14 PM GMT+02:00, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:

>> On 04/06/17 16:17, Florian Weimer wrote:

>>>> Here is what I want to write in the doc:

>>>>

>>>> @item typeless_storage

>>>> @cindex @code{typeless_storage} type attribute

>>>> A type declared with this attribute behaves like a character type

>>>> with respect to aliasing semantics.

>>>> This is attribute is similar to the @code{may_alias} attribute,

>>>> except that it is not restricted to pointers.

>>>

>>> As Jakub pointed out, this is not what we need here.  An object of

>> type

>>> char does *not* have untyped storage.  Accessing it as a different

>> type

>>> is still undefined.

>>>

>>

>> but, do you agree that this is valid in C11?

>>

>> typedef char char_a[4];

>>

>> int

>> main (void)

>> {

>>   char_a a = {1,2,3,4};

>>   short *b = (short *) &a;

>>

>>   b[1] = 0;

>>

>>   if (a[0] == 1 && a[1] == 2 && a[2] == 3 && a[3] == 4)

>>     abort();

>>

>>   exit(0);

>> }

>>

>>

>> all I want to do is replace "char" with a different type.

>

> Why?


- It feels more othogonal this way.
- Otherwise malloc would have magic power, in creating objects with no
   declared type.
- And implementing something like malloc in plain C would actually be
   forbidden, which is ridiculous, because I already have done it.
- It was easy to implement in the middle end.
- It feels useful in C and C++.
- Jason says :)


Bernd.

>

> Richard.
Bernd Edlinger April 6, 2017, 9 p.m. UTC | #22
On 04/06/17 09:47, Richard Biener wrote:
> On Wed, 5 Apr 2017, Bernd Edlinger wrote:
>
>> On 04/05/17 19:22, Bernd Edlinger wrote:
>>> On 04/05/17 18:08, Jakub Jelinek wrote:
>>>
>>> Yes, exactly.  I really want to reach the deadline for gcc-7.
>>> Fixing the name is certainly the most important first step,
>>> and if everybody agrees on "typeless_storage", for the name
>>> I can start with adjusting the name, and look into how
>>> to use a spare type-flag that should be a mechanical change.
>>>
>>
>> Jakub, I just renamed the attribute and reworked the patch
>> as you suggested, reg-testing is not yet completed, but
>> it looks good so far.  I also added a few more tests.
>>
>> I have changed the documentation as Richi suggested, but
>> I am not too sure what to say here.
>
> The alias.c changes are not sufficient.  I think what you want is
> sth like
>
> Index: gcc/alias.c
> ===================================================================
> --- gcc/alias.c	(revision 246678)
> +++ gcc/alias.c	(working copy)
> @@ -136,6 +136,9 @@ struct GTY(()) alias_set_entry {
>    bool is_pointer;
>    /* Nonzero if is_pointer or if one of childs have has_pointer set.  */
>    bool has_pointer;
> +  /* Nonzero if we have a child serving as typeless storage (or are
> +     such storage ourselves).  */
> +  bool has_typeless_storage;
>
>    /* The children of the alias set.  These are not just the immediate
>       children, but, in fact, all descendants.  So, if we have:
> @@ -419,7 +422,8 @@ alias_set_subset_of (alias_set_type set1
>    /* Check if set1 is a subset of set2.  */
>    ase2 = get_alias_set_entry (set2);
>    if (ase2 != 0
> -      && (ase2->has_zero_child
> +      && (ase2->has_typeless_storage
> +	  || ase2->has_zero_child
>  	  || (ase2->children && ase2->children->get (set1))))
>      return true;
>

I think get_alias_set(t) will return 0 for typeless_storage
types, and therefore has_zero_child will be set anyway.
I think both mean the same thing in the end, but it depends on
what typeless_storage should actually mean, and we have
not yet the same idea about it.

> @@ -825,6 +829,7 @@ init_alias_set_entry (alias_set_type set
>    ase->has_zero_child = false;
>    ase->is_pointer = false;
>    ase->has_pointer = false;
> +  ase->has_typeless_storage = false;
>    gcc_checking_assert (!get_alias_set_entry (set));
>    (*alias_sets)[set] = ase;
>    return ase;
> @@ -955,6 +960,7 @@ get_alias_set (tree t)
>       Just be pragmatic here and make sure the array and its element
>       type get the same alias set assigned.  */
>    else if (TREE_CODE (t) == ARRAY_TYPE
> +	   && ! TYPE_TYPELESS_STORAGE (t)
>  	   && (!TYPE_NONALIASED_COMPONENT (t)
>  	       || TYPE_STRUCTURAL_EQUALITY_P (t)))
>      set = get_alias_set (TREE_TYPE (t));
> @@ -1094,6 +1100,15 @@ get_alias_set (tree t)
>
>    TYPE_ALIAS_SET (t) = set;
>
> +  if (TREE_CODE (t) == ARRAY_TYPE
> +      && TYPE_TYPELESS_STORAGE (t))
> +    {
> +      alias_set_entry *ase = get_alias_set_entry (set);
> +      if (!ase)
> +	ase = init_alias_set_entry (set);
> +      ase->has_typeless_storage = true;
> +    }
> +
>    /* If this is an aggregate type or a complex type, we must record any
>       component aliasing information.  */
>    if (AGGREGATE_TYPE_P (t) || TREE_CODE (t) == COMPLEX_TYPE)
> @@ -1173,6 +1188,8 @@ record_alias_subset (alias_set_type supe
>  	    superset_entry->has_zero_child = true;
>            if (subset_entry->has_pointer)
>  	    superset_entry->has_pointer = true;
> +	  if (subset_entry->has_typeless_storage)
> +	    superset_entry->has_typeless_storage = true;
>
>  	  if (subset_entry->children)
>  	    {
>
>
> please also restrict TYPE_TYPELESS_STORAGE to ARRAY_TYPEs (otherwise
> more complications will arise).
>
> Index: gcc/cp/class.c
> ===================================================================
> --- gcc/cp/class.c      (revision 246678)
> +++ gcc/cp/class.c      (working copy)
> @@ -2083,7 +2083,8 @@ fixup_attribute_variants (tree t)
>    tree attrs = TYPE_ATTRIBUTES (t);
>    unsigned align = TYPE_ALIGN (t);
>    bool user_align = TYPE_USER_ALIGN (t);
> -  bool may_alias = lookup_attribute ("may_alias", attrs);
> +  bool may_alias = TYPE_TYPELESS_STORAGE (t)
> +                  || lookup_attribute ("may_alias", attrs);
>
>    if (may_alias)
>      fixup_may_alias (t);
> @@ -7345,6 +7348,12 @@ finish_struct_1 (tree t)
>       the class or perform any other required target modifications.  */
>    targetm.cxx.adjust_class_at_definition (t);
>
> +  if (cxx_dialect >= cxx1z && cxx_type_contains_byte_buffer (t))
> +    {
> +      TYPE_TYPELESS_STORAGE (t) = 1;
> +      fixup_attribute_variants (t);
> ...
>
> I don't think you need all this given alias.c only looks at
> TYPE_MAIN_VARIANTs.

I wanted to be able to declare a int __attribute__((typeless_storage))
as in the test case, and the sample in the spec.  And that
information is not in the TYPE_MAIN_VARIANT.  Therefore I look for
typeless_storage before "t = TYPE_MAIN_VARIANT (t)".

>
> Index: gcc/cp/decl.c
> ===================================================================
> --- gcc/cp/decl.c       (revision 246678)
> +++ gcc/cp/decl.c       (working copy)
> @@ -14081,10 +14081,11 @@ start_enum (tree name, tree enumtype, tree
> underly
>           enumtype = pushtag (name, enumtype, /*tag_scope=*/ts_current);
>
>           /* std::byte aliases anything.  */
> -         if (enumtype != error_mark_node
> +         if (cxx_dialect >= cxx1z
> +             && enumtype != error_mark_node
>               && TYPE_CONTEXT (enumtype) == std_node
>               && !strcmp ("byte", TYPE_NAME_STRING (enumtype)))
> -           TYPE_ALIAS_SET (enumtype) = 0;
> +           TYPE_TYPELESS_STORAGE (enumtype) = 1;
>         }
>        else
>
> not needed (but also not sufficient - you need to handle arrays of
> byte somewhere).

See cxx_type_contains_byte_buffer: this function looks recursively into
structures and unions, and returns the information if the beast
contains an array of unsigned char or std::byte.

>
> @@ -1491,6 +1491,7 @@ struct GTY(()) tree_type_common {
>    unsigned needs_constructing_flag : 1;
>    unsigned transparent_aggr_flag : 1;
>    unsigned restrict_flag : 1;
> +  unsigned typeless_storage_flag : 1;
>    unsigned contains_placeholder_bits : 2;
>
>    ENUM_BITFIELD(machine_mode) mode : 8;
>
> bits are grouped in groups of 8 bits, this breaks it.
>

Oh..., does this explain the problems that I had with this version???

> @@ -8041,7 +8041,8 @@ build_pointer_type_for_mode (tree to_type, machine
>
>    /* If the pointed-to type has the may_alias attribute set, force
>       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
> -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
> +  if (TYPE_TYPELESS_STORAGE (to_type)
> +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>      can_alias_all = true;
>
>    /* In some cases, languages will have things that aren't a POINTER_TYPE
> @@ -8110,7 +8111,8 @@ build_reference_type_for_mode (tree to_type, machi
>
>    /* If the pointed-to type has the may_alias attribute set, force
>       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
> -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
> +  if (TYPE_TYPELESS_STORAGE (to_type)
> +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>      can_alias_all = true;
>
>    /* In some cases, languages will have things that aren't a
>
> not needed.
>

You mean, because the get_alias_set (to_type) will be 0 anyways,
and can_alias_all wont change the semantic?


Bernd.

> +/* Nonzero if the type should behave like a character type
> +   with respect to aliasing sementics.  */
> +#define TYPE_TYPELESS_STORAGE(NODE) \
> +  (TYPE_CHECK (NODE)->type_common.typeless_storage_flag)
>
> ARRAY_TYPE_CHECK (NODE)->
>
> Richard.
>
Richard Biener April 7, 2017, 6:47 a.m. UTC | #23
On Thu, 6 Apr 2017, Bernd Edlinger wrote:

> On 04/06/17 21:05, Florian Weimer wrote:
> > On 04/06/2017 08:49 PM, Bernd Edlinger wrote:
> >
> >> For instance how do you "declare an object without a declared type"?
> >
> > malloc and other allocation functions return pointers to objects without
> > a declared type.
> >
> 
> Thanks Florian,
> 
> this discussion is very helpful.
> 
> How about this for the documentation:
> 
> @item typeless_storage
> @cindex @code{typeless_storage} type attribute
> In the context of section 6.5 paragraph 6 of the C11 standard,
> an object of this type behaves as if it has no declared type.
> In the context of section 6.5 paragraph 7 of the C11 standard,
> an object or a pointer if this type behaves as if it were a
> character type.
> This is attribute is similar to the @code{may_alias} attribute,
> except that it is not restricted to pointers.
> 
> Example of use:
> 
> @smallexample
> typedef int __attribute__((__typeless_storage__)) int_a;
> 
> int
> main (void)
> @{
>    int_a a = 0x12345678;
>    short *b = (short *) &a;
> 
>    b[1] = 0;
> 
>    if (a == 0x12345678)
>      abort();
> 
>    exit(0);
> @}
> @end smallexample

Seriously, do not suggest such broken case.  There's a union to
do this example portably.

Richard.
Richard Biener April 7, 2017, 6:54 a.m. UTC | #24
On Thu, 6 Apr 2017, Bernd Edlinger wrote:

> On 04/06/17 09:47, Richard Biener wrote:
> > On Wed, 5 Apr 2017, Bernd Edlinger wrote:
> >
> >> On 04/05/17 19:22, Bernd Edlinger wrote:
> >>> On 04/05/17 18:08, Jakub Jelinek wrote:
> >>>
> >>> Yes, exactly.  I really want to reach the deadline for gcc-7.
> >>> Fixing the name is certainly the most important first step,
> >>> and if everybody agrees on "typeless_storage", for the name
> >>> I can start with adjusting the name, and look into how
> >>> to use a spare type-flag that should be a mechanical change.
> >>>
> >>
> >> Jakub, I just renamed the attribute and reworked the patch
> >> as you suggested, reg-testing is not yet completed, but
> >> it looks good so far.  I also added a few more tests.
> >>
> >> I have changed the documentation as Richi suggested, but
> >> I am not too sure what to say here.
> >
> > The alias.c changes are not sufficient.  I think what you want is
> > sth like
> >
> > Index: gcc/alias.c
> > ===================================================================
> > --- gcc/alias.c	(revision 246678)
> > +++ gcc/alias.c	(working copy)
> > @@ -136,6 +136,9 @@ struct GTY(()) alias_set_entry {
> >    bool is_pointer;
> >    /* Nonzero if is_pointer or if one of childs have has_pointer set.  */
> >    bool has_pointer;
> > +  /* Nonzero if we have a child serving as typeless storage (or are
> > +     such storage ourselves).  */
> > +  bool has_typeless_storage;
> >
> >    /* The children of the alias set.  These are not just the immediate
> >       children, but, in fact, all descendants.  So, if we have:
> > @@ -419,7 +422,8 @@ alias_set_subset_of (alias_set_type set1
> >    /* Check if set1 is a subset of set2.  */
> >    ase2 = get_alias_set_entry (set2);
> >    if (ase2 != 0
> > -      && (ase2->has_zero_child
> > +      && (ase2->has_typeless_storage
> > +	  || ase2->has_zero_child
> >  	  || (ase2->children && ase2->children->get (set1))))
> >      return true;
> >
> 
> I think get_alias_set(t) will return 0 for typeless_storage
> types, and therefore has_zero_child will be set anyway.
> I think both mean the same thing in the end, but it depends on
> what typeless_storage should actually mean, and we have
> not yet the same idea about it.

But has_zero_child does not do what we like it to because otherwise
in the PR using the char[] array member would have worked!

has_zero_child doesn't do that on purpose of course, but this means
returing alias-set zero for the typeless storage _member_ doesn't
suffice.

> > @@ -825,6 +829,7 @@ init_alias_set_entry (alias_set_type set
> >    ase->has_zero_child = false;
> >    ase->is_pointer = false;
> >    ase->has_pointer = false;
> > +  ase->has_typeless_storage = false;
> >    gcc_checking_assert (!get_alias_set_entry (set));
> >    (*alias_sets)[set] = ase;
> >    return ase;
> > @@ -955,6 +960,7 @@ get_alias_set (tree t)
> >       Just be pragmatic here and make sure the array and its element
> >       type get the same alias set assigned.  */
> >    else if (TREE_CODE (t) == ARRAY_TYPE
> > +	   && ! TYPE_TYPELESS_STORAGE (t)
> >  	   && (!TYPE_NONALIASED_COMPONENT (t)
> >  	       || TYPE_STRUCTURAL_EQUALITY_P (t)))
> >      set = get_alias_set (TREE_TYPE (t));
> > @@ -1094,6 +1100,15 @@ get_alias_set (tree t)
> >
> >    TYPE_ALIAS_SET (t) = set;
> >
> > +  if (TREE_CODE (t) == ARRAY_TYPE
> > +      && TYPE_TYPELESS_STORAGE (t))
> > +    {
> > +      alias_set_entry *ase = get_alias_set_entry (set);
> > +      if (!ase)
> > +	ase = init_alias_set_entry (set);
> > +      ase->has_typeless_storage = true;
> > +    }
> > +
> >    /* If this is an aggregate type or a complex type, we must record any
> >       component aliasing information.  */
> >    if (AGGREGATE_TYPE_P (t) || TREE_CODE (t) == COMPLEX_TYPE)
> > @@ -1173,6 +1188,8 @@ record_alias_subset (alias_set_type supe
> >  	    superset_entry->has_zero_child = true;
> >            if (subset_entry->has_pointer)
> >  	    superset_entry->has_pointer = true;
> > +	  if (subset_entry->has_typeless_storage)
> > +	    superset_entry->has_typeless_storage = true;
> >
> >  	  if (subset_entry->children)
> >  	    {
> >
> >
> > please also restrict TYPE_TYPELESS_STORAGE to ARRAY_TYPEs (otherwise
> > more complications will arise).
> >
> > Index: gcc/cp/class.c
> > ===================================================================
> > --- gcc/cp/class.c      (revision 246678)
> > +++ gcc/cp/class.c      (working copy)
> > @@ -2083,7 +2083,8 @@ fixup_attribute_variants (tree t)
> >    tree attrs = TYPE_ATTRIBUTES (t);
> >    unsigned align = TYPE_ALIGN (t);
> >    bool user_align = TYPE_USER_ALIGN (t);
> > -  bool may_alias = lookup_attribute ("may_alias", attrs);
> > +  bool may_alias = TYPE_TYPELESS_STORAGE (t)
> > +                  || lookup_attribute ("may_alias", attrs);
> >
> >    if (may_alias)
> >      fixup_may_alias (t);
> > @@ -7345,6 +7348,12 @@ finish_struct_1 (tree t)
> >       the class or perform any other required target modifications.  */
> >    targetm.cxx.adjust_class_at_definition (t);
> >
> > +  if (cxx_dialect >= cxx1z && cxx_type_contains_byte_buffer (t))
> > +    {
> > +      TYPE_TYPELESS_STORAGE (t) = 1;
> > +      fixup_attribute_variants (t);
> > ...
> >
> > I don't think you need all this given alias.c only looks at
> > TYPE_MAIN_VARIANTs.
> 
> I wanted to be able to declare a int __attribute__((typeless_storage))
> as in the test case, and the sample in the spec.  And that
> information is not in the TYPE_MAIN_VARIANT.  Therefore I look for
> typeless_storage before "t = TYPE_MAIN_VARIANT (t)".

As I said I believe this is a useless feature.  If you want something
typeless then the underlying type doesn't matter so we can as well
force it to be an array of char.  Makes our live simpler.  And
even makes the code portable to compilers that treat arrays of char
conservatively.

> >
> > Index: gcc/cp/decl.c
> > ===================================================================
> > --- gcc/cp/decl.c       (revision 246678)
> > +++ gcc/cp/decl.c       (working copy)
> > @@ -14081,10 +14081,11 @@ start_enum (tree name, tree enumtype, tree
> > underly
> >           enumtype = pushtag (name, enumtype, /*tag_scope=*/ts_current);
> >
> >           /* std::byte aliases anything.  */
> > -         if (enumtype != error_mark_node
> > +         if (cxx_dialect >= cxx1z
> > +             && enumtype != error_mark_node
> >               && TYPE_CONTEXT (enumtype) == std_node
> >               && !strcmp ("byte", TYPE_NAME_STRING (enumtype)))
> > -           TYPE_ALIAS_SET (enumtype) = 0;
> > +           TYPE_TYPELESS_STORAGE (enumtype) = 1;
> >         }
> >        else
> >
> > not needed (but also not sufficient - you need to handle arrays of
> > byte somewhere).
> 
> See cxx_type_contains_byte_buffer: this function looks recursively into
> structures and unions, and returns the information if the beast
> contains an array of unsigned char or std::byte.

But with a properly designed middle-end feature that's not needed.

There's technically no reason to pessimize TBAA for anything but
the typeless storage member of a structure.

> >
> > @@ -1491,6 +1491,7 @@ struct GTY(()) tree_type_common {
> >    unsigned needs_constructing_flag : 1;
> >    unsigned transparent_aggr_flag : 1;
> >    unsigned restrict_flag : 1;
> > +  unsigned typeless_storage_flag : 1;
> >    unsigned contains_placeholder_bits : 2;
> >
> >    ENUM_BITFIELD(machine_mode) mode : 8;
> >
> > bits are grouped in groups of 8 bits, this breaks it.
> >
> 
> Oh..., does this explain the problems that I had with this version???

No, just "cosmetics".

> > @@ -8041,7 +8041,8 @@ build_pointer_type_for_mode (tree to_type, machine
> >
> >    /* If the pointed-to type has the may_alias attribute set, force
> >       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
> > -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
> > +  if (TYPE_TYPELESS_STORAGE (to_type)
> > +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
> >      can_alias_all = true;
> >
> >    /* In some cases, languages will have things that aren't a POINTER_TYPE
> > @@ -8110,7 +8111,8 @@ build_reference_type_for_mode (tree to_type, machi
> >
> >    /* If the pointed-to type has the may_alias attribute set, force
> >       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
> > -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
> > +  if (TYPE_TYPELESS_STORAGE (to_type)
> > +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
> >      can_alias_all = true;
> >
> >    /* In some cases, languages will have things that aren't a
> >
> > not needed.
> >
> 
> You mean, because the get_alias_set (to_type) will be 0 anyways,
> and can_alias_all wont change the semantic?

Well, typeless_storage and may_alias are something different.  If
you require the above then your implementation of typeless_storage
is broken.

Richard.

> 
> Bernd.
> 
> > +/* Nonzero if the type should behave like a character type
> > +   with respect to aliasing sementics.  */
> > +#define TYPE_TYPELESS_STORAGE(NODE) \
> > +  (TYPE_CHECK (NODE)->type_common.typeless_storage_flag)
> >
> > ARRAY_TYPE_CHECK (NODE)->
> >
> > Richard.
> >
> 
>
Florian Weimer April 7, 2017, 6:56 a.m. UTC | #25
On 04/06/2017 09:16 PM, Richard Biener wrote:
> On April 6, 2017 8:12:29 PM GMT+02:00, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
>> But isn't the effective type changed by the assignment b[1] = 0;
>> as described in 6.5(6):
>> "If a value is stored into an object having no declared type through an
>> lvalue having a type that is not a character type, then the type of the
>> lvalue becomes the effective type of the object for that access and for
>> subsequent accesses that do not modify the stored value."
>
> Yes.  I think the example is valid.  At least GCCs memory model makes it so.

As far as I understand the standard, C does not permit changing the 
effective type of an object if it has a declared type (at least not 
without a union).  If GCC supports it, that's an undocumented GCC extension.

Thanks,
Florian
Richard Biener April 7, 2017, 8:01 a.m. UTC | #26
On Fri, 7 Apr 2017, Florian Weimer wrote:

> On 04/06/2017 09:16 PM, Richard Biener wrote:
> > On April 6, 2017 8:12:29 PM GMT+02:00, Bernd Edlinger
> > <bernd.edlinger@hotmail.de> wrote:
> > > But isn't the effective type changed by the assignment b[1] = 0;
> > > as described in 6.5(6):
> > > "If a value is stored into an object having no declared type through an
> > > lvalue having a type that is not a character type, then the type of the
> > > lvalue becomes the effective type of the object for that access and for
> > > subsequent accesses that do not modify the stored value."
> > 
> > Yes.  I think the example is valid.  At least GCCs memory model makes it so.
> 
> As far as I understand the standard, C does not permit changing the effective
> type of an object if it has a declared type (at least not without a union).
> If GCC supports it, that's an undocumented GCC extension.

The GCC middle-end supports it because C++ supports it and there is no
way for the C FE to tell the middle-end that this is not valid.

Richard.
Bernd Edlinger April 7, 2017, 12:58 p.m. UTC | #27
On 04/07/17 08:47, Richard Biener wrote:
> On Thu, 6 Apr 2017, Bernd Edlinger wrote:
>
>> On 04/06/17 21:05, Florian Weimer wrote:
>>> On 04/06/2017 08:49 PM, Bernd Edlinger wrote:
>>>
>>>> For instance how do you "declare an object without a declared type"?
>>>
>>> malloc and other allocation functions return pointers to objects without
>>> a declared type.
>>>
>>
>> Thanks Florian,
>>
>> this discussion is very helpful.
>>
>> How about this for the documentation:
>>
>> @item typeless_storage
>> @cindex @code{typeless_storage} type attribute
>> In the context of section 6.5 paragraph 6 of the C11 standard,
>> an object of this type behaves as if it has no declared type.
>> In the context of section 6.5 paragraph 7 of the C11 standard,
>> an object or a pointer if this type behaves as if it were a
>> character type.
>> This is attribute is similar to the @code{may_alias} attribute,
>> except that it is not restricted to pointers.
>>
>> Example of use:
>>
>> @smallexample
>> typedef int __attribute__((__typeless_storage__)) int_a;
>>
>> int
>> main (void)
>> @{
>>    int_a a = 0x12345678;
>>    short *b = (short *) &a;
>>
>>    b[1] = 0;
>>
>>    if (a == 0x12345678)
>>      abort();
>>
>>    exit(0);
>> @}
>> @end smallexample
>
> Seriously, do not suggest such broken case.  There's a union to
> do this example portably.
>

Well, it is just a mod of the other example above in
the documentation of may_alias:

typedef short __attribute__((__may_alias__)) short_a;

int
main (void)
@{
   int a = 0x12345678;
   short_a *b = (short_a *) &a;

   b[1] = 0;

   if (a == 0x12345678)
     abort();

   exit(0);
@}
@end smallexample

I just moved the attribute from "b" to "a", and that
is what the C++ people want to do as well, just they
call it a "class int_a { std::byte x[4]; }".

I personally like to have the symmetry between the
two concepts here, because it helps to understand the
differences.


Bernd.

> Richard.
>
Bernd Edlinger April 7, 2017, 1:37 p.m. UTC | #28
On 04/07/17 08:54, Richard Biener wrote:
> On Thu, 6 Apr 2017, Bernd Edlinger wrote:
>> I think get_alias_set(t) will return 0 for typeless_storage
>> types, and therefore has_zero_child will be set anyway.
>> I think both mean the same thing in the end, but it depends on
>> what typeless_storage should actually mean, and we have
>> not yet the same idea about it.
>
> But has_zero_child does not do what we like it to because otherwise
> in the PR using the char[] array member would have worked!
>
> has_zero_child doesn't do that on purpose of course, but this means
> returing alias-set zero for the typeless storage _member_ doesn't
> suffice.
>

I see you have a certain idea how to solve the C++17 issue.
And yes, I apologize, if I tried to pee on your tree :)

What you propose is I think the following:
The C++ FE sets TYPE_TYPELESS_STORAGE a std::byte
and on "unsigned char" if the language dialect is cxx17
and the TBAA makes all the rest.

What I propose is as follows:
The TYPE_TYPELESS_STORAGE is a generic attribute, it
can be set on any type, and in the TBAA the attribute
does not squirrel around at all.  If it is on a type,
then all DECLs with this type get the alias set 0.
If it is on a member of a struct that does not mean
more than if the struct has a char member this it
sets has_zero_child, which I do not want to mean
anything else than before.

The C++ FE does the business logic here, in deciding
where to distribute the TYPE_TYPELESS_STORAGE flags.

in this example
class A {
   class B {
     std::byte x[5];
   } b;
};

std::byte, class B, and class A would get the
TYPE_TYPELESS_STORAGE flag set by the C++FE if
the language dialect is cxx17 or above,
so that you can place anything into any object
of class A and class B, and of type std::byte.

but in this example
class B {
   std::byte x;
};

only std::byte would get the TYPE_TYPELESS_STORAGE
flag, so you can not put anyting into an object
of class B, just on an object of std::byte.



>>
>> I wanted to be able to declare a int __attribute__((typeless_storage))
>> as in the test case, and the sample in the spec.  And that
>> information is not in the TYPE_MAIN_VARIANT.  Therefore I look for
>> typeless_storage before "t = TYPE_MAIN_VARIANT (t)".
>
> As I said I believe this is a useless feature.  If you want something
> typeless then the underlying type doesn't matter so we can as well
> force it to be an array of char.  Makes our live simpler.  And
> even makes the code portable to compilers that treat arrays of char
> conservatively.
>

I just learned that the C11 standard does not guarantee that, and also
an array of char does not provide the necessary alignment per se, at
least without alignment attributes.

>>
>> See cxx_type_contains_byte_buffer: this function looks recursively into
>> structures and unions, and returns the information if the beast
>> contains an array of unsigned char or std::byte.
>
> But with a properly designed middle-end feature that's not needed.
>
> There's technically no reason to pessimize TBAA for anything but
> the typeless storage member of a structure.
>

Yes, it is just a matter of taste.  And if you want the middle
end to be flexible here or if everything should work without user
intervention.


>>>
>>> @@ -1491,6 +1491,7 @@ struct GTY(()) tree_type_common {
>>>    unsigned needs_constructing_flag : 1;
>>>    unsigned transparent_aggr_flag : 1;
>>>    unsigned restrict_flag : 1;
>>> +  unsigned typeless_storage_flag : 1;
>>>    unsigned contains_placeholder_bits : 2;
>>>
>>>    ENUM_BITFIELD(machine_mode) mode : 8;
>>>
>>> bits are grouped in groups of 8 bits, this breaks it.
>>>
>>
>> Oh..., does this explain the problems that I had with this version???
>
> No, just "cosmetics".
>
>>> @@ -8041,7 +8041,8 @@ build_pointer_type_for_mode (tree to_type, machine
>>>
>>>    /* If the pointed-to type has the may_alias attribute set, force
>>>       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
>>> -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>>> +  if (TYPE_TYPELESS_STORAGE (to_type)
>>> +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>>>      can_alias_all = true;
>>>
>>>    /* In some cases, languages will have things that aren't a POINTER_TYPE
>>> @@ -8110,7 +8111,8 @@ build_reference_type_for_mode (tree to_type, machi
>>>
>>>    /* If the pointed-to type has the may_alias attribute set, force
>>>       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
>>> -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>>> +  if (TYPE_TYPELESS_STORAGE (to_type)
>>> +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>>>      can_alias_all = true;
>>>
>>>    /* In some cases, languages will have things that aren't a
>>>
>>> not needed.
>>>
>>
>> You mean, because the get_alias_set (to_type) will be 0 anyways,
>> and can_alias_all wont change the semantic?
>
> Well, typeless_storage and may_alias are something different.  If
> you require the above then your implementation of typeless_storage
> is broken.
>

You are right, the hunk above is actually unnecessary.

> Richard.
>
>>
>> Bernd.
>>
>>> +/* Nonzero if the type should behave like a character type
>>> +   with respect to aliasing sementics.  */
>>> +#define TYPE_TYPELESS_STORAGE(NODE) \
>>> +  (TYPE_CHECK (NODE)->type_common.typeless_storage_flag)
>>>
>>> ARRAY_TYPE_CHECK (NODE)->
>>>
>>> Richard.
>>>
>>
>>
>
Richard Biener April 7, 2017, 3:10 p.m. UTC | #29
On April 7, 2017 3:37:30 PM GMT+02:00, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
>On 04/07/17 08:54, Richard Biener wrote:
>> On Thu, 6 Apr 2017, Bernd Edlinger wrote:
>>> I think get_alias_set(t) will return 0 for typeless_storage
>>> types, and therefore has_zero_child will be set anyway.
>>> I think both mean the same thing in the end, but it depends on
>>> what typeless_storage should actually mean, and we have
>>> not yet the same idea about it.
>>
>> But has_zero_child does not do what we like it to because otherwise
>> in the PR using the char[] array member would have worked!
>>
>> has_zero_child doesn't do that on purpose of course, but this means
>> returing alias-set zero for the typeless storage _member_ doesn't
>> suffice.
>>
>
>I see you have a certain idea how to solve the C++17 issue.
>And yes, I apologize, if I tried to pee on your tree :)

We do have the need to support this part of the C++ standard.  For other user code may_alias suffices and I see no reason to haste inventing sth new without a single convincing testcase.  GCC/Language extensions should not be added without a good reason.

I didn't propose to expose the type flag to users at all.

Richard.

>What you propose is I think the following:
>The C++ FE sets TYPE_TYPELESS_STORAGE a std::byte
>and on "unsigned char" if the language dialect is cxx17
>and the TBAA makes all the rest.
>
>What I propose is as follows:
>The TYPE_TYPELESS_STORAGE is a generic attribute, it
>can be set on any type, and in the TBAA the attribute
>does not squirrel around at all.  If it is on a type,
>then all DECLs with this type get the alias set 0.
>If it is on a member of a struct that does not mean
>more than if the struct has a char member this it
>sets has_zero_child, which I do not want to mean
>anything else than before.
>
>The C++ FE does the business logic here, in deciding
>where to distribute the TYPE_TYPELESS_STORAGE flags.
>
>in this example
>class A {
>   class B {
>     std::byte x[5];
>   } b;
>};
>
>std::byte, class B, and class A would get the
>TYPE_TYPELESS_STORAGE flag set by the C++FE if
>the language dialect is cxx17 or above,
>so that you can place anything into any object
>of class A and class B, and of type std::byte.
>
>but in this example
>class B {
>   std::byte x;
>};
>
>only std::byte would get the TYPE_TYPELESS_STORAGE
>flag, so you can not put anyting into an object
>of class B, just on an object of std::byte.
>
>
>
>>>
>>> I wanted to be able to declare a int
>__attribute__((typeless_storage))
>>> as in the test case, and the sample in the spec.  And that
>>> information is not in the TYPE_MAIN_VARIANT.  Therefore I look for
>>> typeless_storage before "t = TYPE_MAIN_VARIANT (t)".
>>
>> As I said I believe this is a useless feature.  If you want something
>> typeless then the underlying type doesn't matter so we can as well
>> force it to be an array of char.  Makes our live simpler.  And
>> even makes the code portable to compilers that treat arrays of char
>> conservatively.
>>
>
>I just learned that the C11 standard does not guarantee that, and also
>an array of char does not provide the necessary alignment per se, at
>least without alignment attributes.
>
>>>
>>> See cxx_type_contains_byte_buffer: this function looks recursively
>into
>>> structures and unions, and returns the information if the beast
>>> contains an array of unsigned char or std::byte.
>>
>> But with a properly designed middle-end feature that's not needed.
>>
>> There's technically no reason to pessimize TBAA for anything but
>> the typeless storage member of a structure.
>>
>
>Yes, it is just a matter of taste.  And if you want the middle
>end to be flexible here or if everything should work without user
>intervention.
>
>
>>>>
>>>> @@ -1491,6 +1491,7 @@ struct GTY(()) tree_type_common {
>>>>    unsigned needs_constructing_flag : 1;
>>>>    unsigned transparent_aggr_flag : 1;
>>>>    unsigned restrict_flag : 1;
>>>> +  unsigned typeless_storage_flag : 1;
>>>>    unsigned contains_placeholder_bits : 2;
>>>>
>>>>    ENUM_BITFIELD(machine_mode) mode : 8;
>>>>
>>>> bits are grouped in groups of 8 bits, this breaks it.
>>>>
>>>
>>> Oh..., does this explain the problems that I had with this
>version???
>>
>> No, just "cosmetics".
>>
>>>> @@ -8041,7 +8041,8 @@ build_pointer_type_for_mode (tree to_type,
>machine
>>>>
>>>>    /* If the pointed-to type has the may_alias attribute set, force
>>>>       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
>>>> -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>>>> +  if (TYPE_TYPELESS_STORAGE (to_type)
>>>> +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES
>(to_type)))
>>>>      can_alias_all = true;
>>>>
>>>>    /* In some cases, languages will have things that aren't a
>POINTER_TYPE
>>>> @@ -8110,7 +8111,8 @@ build_reference_type_for_mode (tree to_type,
>machi
>>>>
>>>>    /* If the pointed-to type has the may_alias attribute set, force
>>>>       a TYPE_REF_CAN_ALIAS_ALL pointer to be generated.  */
>>>> -  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (to_type)))
>>>> +  if (TYPE_TYPELESS_STORAGE (to_type)
>>>> +      || lookup_attribute ("may_alias", TYPE_ATTRIBUTES
>(to_type)))
>>>>      can_alias_all = true;
>>>>
>>>>    /* In some cases, languages will have things that aren't a
>>>>
>>>> not needed.
>>>>
>>>
>>> You mean, because the get_alias_set (to_type) will be 0 anyways,
>>> and can_alias_all wont change the semantic?
>>
>> Well, typeless_storage and may_alias are something different.  If
>> you require the above then your implementation of typeless_storage
>> is broken.
>>
>
>You are right, the hunk above is actually unnecessary.
>
>> Richard.
>>
>>>
>>> Bernd.
>>>
>>>> +/* Nonzero if the type should behave like a character type
>>>> +   with respect to aliasing sementics.  */
>>>> +#define TYPE_TYPELESS_STORAGE(NODE) \
>>>> +  (TYPE_CHECK (NODE)->type_common.typeless_storage_flag)
>>>>
>>>> ARRAY_TYPE_CHECK (NODE)->
>>>>
>>>> Richard.
>>>>
>>>
>>>
>>
Florian Weimer April 11, 2017, 10:43 a.m. UTC | #30
On 04/06/2017 09:12 PM, Richard Biener wrote:
>> Right.  The kernel also has many APIs which return multiple
>> variable-length data blocks, such as getdents64, and many more
>> interfaces in combination with read/recv system calls.  Variable length
>>
>> means that you cannot declare the appropriate type after the first data
>>
>> item, so you technically have to use malloc.
>>
>> POSIX interfaces which exhibit a similar pattern are getpwnam_r and
>> friends, but for them, you can probably use malloc without ill effect
>> (although there are still performance concerns).

> Can you give a concrete example which shows the issue and how typeless_storage helps?

An example is in libffi/src/closures.c:

       char buf[MAXPATHLEN * 3];

       if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL)
         return -1;

The intent is that buf is untyped storage, from which the getmntent_r 
function can allocate objects as needed (instead of using malloc).

Based on your earlier comments, GCC already supports that without any 
further source code annotations.

Thanks,
Florian
Richard Biener April 11, 2017, 10:48 a.m. UTC | #31
On Tue, 11 Apr 2017, Florian Weimer wrote:

> On 04/06/2017 09:12 PM, Richard Biener wrote:
> > > Right.  The kernel also has many APIs which return multiple
> > > variable-length data blocks, such as getdents64, and many more
> > > interfaces in combination with read/recv system calls.  Variable length
> > > 
> > > means that you cannot declare the appropriate type after the first data
> > > 
> > > item, so you technically have to use malloc.
> > > 
> > > POSIX interfaces which exhibit a similar pattern are getpwnam_r and
> > > friends, but for them, you can probably use malloc without ill effect
> > > (although there are still performance concerns).
> 
> > Can you give a concrete example which shows the issue and how
> > typeless_storage helps?
> 
> An example is in libffi/src/closures.c:
> 
>       char buf[MAXPATHLEN * 3];
> 
>       if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL)
>         return -1;
> 
> The intent is that buf is untyped storage, from which the getmntent_r function
> can allocate objects as needed (instead of using malloc).
> 
> Based on your earlier comments, GCC already supports that without any further
> source code annotations.

Yes.

Richard.
diff mbox

Patch

Index: gcc/alias.c
===================================================================
--- gcc/alias.c	(revision 246678)
+++ gcc/alias.c	(working copy)
@@ -136,6 +136,9 @@  struct GTY(()) alias_set_entry {
   bool is_pointer;
   /* Nonzero if is_pointer or if one of childs have has_pointer set.  */
   bool has_pointer;
+  /* Nonzero if we have a child serving as typeless storage (or are
+     such storage ourselves).  */
+  bool has_typeless_storage;
 
   /* The children of the alias set.  These are not just the immediate
      children, but, in fact, all descendants.  So, if we have:
@@ -419,7 +422,8 @@  alias_set_subset_of (alias_set_type set1
   /* Check if set1 is a subset of set2.  */
   ase2 = get_alias_set_entry (set2);
   if (ase2 != 0
-      && (ase2->has_zero_child
+      && (ase2->has_typeless_storage
+	  || ase2->has_zero_child
 	  || (ase2->children && ase2->children->get (set1))))
     return true;
 
@@ -825,6 +829,7 @@  init_alias_set_entry (alias_set_type set
   ase->has_zero_child = false;
   ase->is_pointer = false;
   ase->has_pointer = false;
+  ase->has_typeless_storage = false;
   gcc_checking_assert (!get_alias_set_entry (set));
   (*alias_sets)[set] = ase;
   return ase;
@@ -955,6 +960,7 @@  get_alias_set (tree t)
      Just be pragmatic here and make sure the array and its element
      type get the same alias set assigned.  */
   else if (TREE_CODE (t) == ARRAY_TYPE
+	   && ! TYPE_TYPELESS_STORAGE (t)
 	   && (!TYPE_NONALIASED_COMPONENT (t)
 	       || TYPE_STRUCTURAL_EQUALITY_P (t)))
     set = get_alias_set (TREE_TYPE (t));
@@ -1094,6 +1100,15 @@  get_alias_set (tree t)
 
   TYPE_ALIAS_SET (t) = set;
 
+  if (TREE_CODE (t) == ARRAY_TYPE
+      && TYPE_TYPELESS_STORAGE (t))
+    {
+      alias_set_entry *ase = get_alias_set_entry (set);
+      if (!ase)
+	ase = init_alias_set_entry (set);
+      ase->has_typeless_storage = true;
+    }
+
   /* If this is an aggregate type or a complex type, we must record any
      component aliasing information.  */
   if (AGGREGATE_TYPE_P (t) || TREE_CODE (t) == COMPLEX_TYPE)
@@ -1173,6 +1188,8 @@  record_alias_subset (alias_set_type supe
 	    superset_entry->has_zero_child = true;
           if (subset_entry->has_pointer)
 	    superset_entry->has_pointer = true;
+	  if (subset_entry->has_typeless_storage)
+	    superset_entry->has_typeless_storage = true;
 
 	  if (subset_entry->children)
 	    {


please also restrict TYPE_TYPELESS_STORAGE to ARRAY_TYPEs (otherwise
more complications will arise).

Index: gcc/cp/class.c
===================================================================
--- gcc/cp/class.c      (revision 246678)
+++ gcc/cp/class.c      (working copy)
@@ -2083,7 +2083,8 @@  fixup_attribute_variants (tree t)
   tree attrs = TYPE_ATTRIBUTES (t);
   unsigned align = TYPE_ALIGN (t);
   bool user_align = TYPE_USER_ALIGN (t);
-  bool may_alias = lookup_attribute ("may_alias", attrs);
+  bool may_alias = TYPE_TYPELESS_STORAGE (t)
+                  || lookup_attribute ("may_alias", attrs);

   if (may_alias)
     fixup_may_alias (t);
@@ -7345,6 +7348,12 @@  finish_struct_1 (tree t)
      the class or perform any other required target modifications.  */
   targetm.cxx.adjust_class_at_definition (t);

+  if (cxx_dialect >= cxx1z && cxx_type_contains_byte_buffer (t))
+    {
+      TYPE_TYPELESS_STORAGE (t) = 1;
+      fixup_attribute_variants (t);
...

I don't think you need all this given alias.c only looks at
TYPE_MAIN_VARIANTs.

Index: gcc/cp/decl.c
===================================================================
--- gcc/cp/decl.c       (revision 246678)
+++ gcc/cp/decl.c       (working copy)
@@ -14081,10 +14081,11 @@  start_enum (tree name, tree enumtype, tree 
underly
          enumtype = pushtag (name, enumtype, /*tag_scope=*/ts_current);

          /* std::byte aliases anything.  */
-         if (enumtype != error_mark_node
+         if (cxx_dialect >= cxx1z
+             && enumtype != error_mark_node
              && TYPE_CONTEXT (enumtype) == std_node
              && !strcmp ("byte", TYPE_NAME_STRING (enumtype)))
-           TYPE_ALIAS_SET (enumtype) = 0;
+           TYPE_TYPELESS_STORAGE (enumtype) = 1;
        }
       else